LLVM 21.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/Metadata.h"
60#include "llvm/Support/Debug.h"
68#include <algorithm>
69#include <cassert>
70#include <cstdint>
71#include <functional>
72#include <iterator>
73#include <optional>
74#include <string>
75#include <tuple>
76#include <utility>
77#include <variant>
78
79#include "MatchContext.h"
80
81using namespace llvm;
82using namespace llvm::SDPatternMatch;
83
84#define DEBUG_TYPE "dagcombine"
85
86STATISTIC(NodesCombined , "Number of dag nodes combined");
87STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
88STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
89STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
90STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
91STATISTIC(SlicedLoads, "Number of load sliced");
92STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
93
94DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
95 "Controls whether a DAG combine is performed for a node");
96
97static cl::opt<bool>
98CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
99 cl::desc("Enable DAG combiner's use of IR alias analysis"));
100
101static cl::opt<bool>
102UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
103 cl::desc("Enable DAG combiner's use of TBAA"));
104
105#ifndef NDEBUG
107CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
108 cl::desc("Only use DAG-combiner alias analysis in this"
109 " function"));
110#endif
111
112/// Hidden option to stress test load slicing, i.e., when this option
113/// is enabled, load slicing bypasses most of its profitability guards.
114static cl::opt<bool>
115StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
116 cl::desc("Bypass the profitability model of load slicing"),
117 cl::init(false));
118
119static cl::opt<bool>
120 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
121 cl::desc("DAG combiner may split indexing from loads"));
122
123static cl::opt<bool>
124 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
125 cl::desc("DAG combiner enable merging multiple stores "
126 "into a wider store"));
127
129 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
130 cl::desc("Limit the number of operands to inline for Token Factors"));
131
133 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
134 cl::desc("Limit the number of times for the same StoreNode and RootNode "
135 "to bail out in store merging dependence check"));
136
138 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
139 cl::desc("DAG combiner enable reducing the width of load/op/store "
140 "sequence"));
142 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
143 cl::Hidden, cl::init(false),
144 cl::desc("DAG combiner force override the narrowing profitable check when "
145 "reducing the width of load/op/store sequences"));
146
148 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
149 cl::desc("DAG combiner enable load/<replace bytes>/store with "
150 "a narrower store"));
151
153 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
154 cl::desc(
155 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
156namespace {
157
158 class DAGCombiner {
159 SelectionDAG &DAG;
160 const TargetLowering &TLI;
161 const SelectionDAGTargetInfo *STI;
163 CodeGenOptLevel OptLevel;
164 bool LegalDAG = false;
165 bool LegalOperations = false;
166 bool LegalTypes = false;
167 bool ForCodeSize;
168 bool DisableGenericCombines;
169
170 /// Worklist of all of the nodes that need to be simplified.
171 ///
172 /// This must behave as a stack -- new nodes to process are pushed onto the
173 /// back and when processing we pop off of the back.
174 ///
175 /// The worklist will not contain duplicates but may contain null entries
176 /// due to nodes being deleted from the underlying DAG. For fast lookup and
177 /// deduplication, the index of the node in this vector is stored in the
178 /// node in SDNode::CombinerWorklistIndex.
180
181 /// This records all nodes attempted to be added to the worklist since we
182 /// considered a new worklist entry. As we keep do not add duplicate nodes
183 /// in the worklist, this is different from the tail of the worklist.
185
186 /// Map from candidate StoreNode to the pair of RootNode and count.
187 /// The count is used to track how many times we have seen the StoreNode
188 /// with the same RootNode bail out in dependence check. If we have seen
189 /// the bail out for the same pair many times over a limit, we won't
190 /// consider the StoreNode with the same RootNode as store merging
191 /// candidate again.
193
194 // BatchAA - Used for DAG load/store alias analysis.
195 BatchAAResults *BatchAA;
196
197 /// This caches all chains that have already been processed in
198 /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
199 /// stores candidates.
200 SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
201
202 /// When an instruction is simplified, add all users of the instruction to
203 /// the work lists because they might get more simplified now.
204 void AddUsersToWorklist(SDNode *N) {
205 for (SDNode *Node : N->users())
206 AddToWorklist(Node);
207 }
208
209 /// Convenient shorthand to add a node and all of its user to the worklist.
210 void AddToWorklistWithUsers(SDNode *N) {
211 AddUsersToWorklist(N);
212 AddToWorklist(N);
213 }
214
215 // Prune potentially dangling nodes. This is called after
216 // any visit to a node, but should also be called during a visit after any
217 // failed combine which may have created a DAG node.
218 void clearAddedDanglingWorklistEntries() {
219 // Check any nodes added to the worklist to see if they are prunable.
220 while (!PruningList.empty()) {
221 auto *N = PruningList.pop_back_val();
222 if (N->use_empty())
223 recursivelyDeleteUnusedNodes(N);
224 }
225 }
226
227 SDNode *getNextWorklistEntry() {
228 // Before we do any work, remove nodes that are not in use.
229 clearAddedDanglingWorklistEntries();
230 SDNode *N = nullptr;
231 // The Worklist holds the SDNodes in order, but it may contain null
232 // entries.
233 while (!N && !Worklist.empty()) {
234 N = Worklist.pop_back_val();
235 }
236
237 if (N) {
238 assert(N->getCombinerWorklistIndex() >= 0 &&
239 "Found a worklist entry without a corresponding map entry!");
240 // Set to -2 to indicate that we combined the node.
241 N->setCombinerWorklistIndex(-2);
242 }
243 return N;
244 }
245
246 /// Call the node-specific routine that folds each particular type of node.
248
249 public:
250 DAGCombiner(SelectionDAG &D, BatchAAResults *BatchAA, CodeGenOptLevel OL)
251 : DAG(D), TLI(D.getTargetLoweringInfo()),
252 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
253 BatchAA(BatchAA) {
254 ForCodeSize = DAG.shouldOptForSize();
255 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
256
257 MaximumLegalStoreInBits = 0;
258 // We use the minimum store size here, since that's all we can guarantee
259 // for the scalable vector types.
260 for (MVT VT : MVT::all_valuetypes())
261 if (EVT(VT).isSimple() && VT != MVT::Other &&
262 TLI.isTypeLegal(EVT(VT)) &&
263 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
264 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
265 }
266
267 void ConsiderForPruning(SDNode *N) {
268 // Mark this for potential pruning.
269 PruningList.insert(N);
270 }
271
272 /// Add to the worklist making sure its instance is at the back (next to be
273 /// processed.)
274 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
275 bool SkipIfCombinedBefore = false) {
276 assert(N->getOpcode() != ISD::DELETED_NODE &&
277 "Deleted Node added to Worklist");
278
279 // Skip handle nodes as they can't usefully be combined and confuse the
280 // zero-use deletion strategy.
281 if (N->getOpcode() == ISD::HANDLENODE)
282 return;
283
284 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
285 return;
286
287 if (IsCandidateForPruning)
288 ConsiderForPruning(N);
289
290 if (N->getCombinerWorklistIndex() < 0) {
291 N->setCombinerWorklistIndex(Worklist.size());
292 Worklist.push_back(N);
293 }
294 }
295
296 /// Remove all instances of N from the worklist.
297 void removeFromWorklist(SDNode *N) {
298 PruningList.remove(N);
299 StoreRootCountMap.erase(N);
300
301 int WorklistIndex = N->getCombinerWorklistIndex();
302 // If not in the worklist, the index might be -1 or -2 (was combined
303 // before). As the node gets deleted anyway, there's no need to update
304 // the index.
305 if (WorklistIndex < 0)
306 return; // Not in the worklist.
307
308 // Null out the entry rather than erasing it to avoid a linear operation.
309 Worklist[WorklistIndex] = nullptr;
310 N->setCombinerWorklistIndex(-1);
311 }
312
313 void deleteAndRecombine(SDNode *N);
314 bool recursivelyDeleteUnusedNodes(SDNode *N);
315
316 /// Replaces all uses of the results of one DAG node with new values.
317 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
318 bool AddTo = true);
319
320 /// Replaces all uses of the results of one DAG node with new values.
321 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
322 return CombineTo(N, &Res, 1, AddTo);
323 }
324
325 /// Replaces all uses of the results of one DAG node with new values.
326 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
327 bool AddTo = true) {
328 SDValue To[] = { Res0, Res1 };
329 return CombineTo(N, To, 2, AddTo);
330 }
331
332 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
333
334 private:
335 unsigned MaximumLegalStoreInBits;
336
337 /// Check the specified integer node value to see if it can be simplified or
338 /// if things it uses can be simplified by bit propagation.
339 /// If so, return true.
340 bool SimplifyDemandedBits(SDValue Op) {
341 unsigned BitWidth = Op.getScalarValueSizeInBits();
343 return SimplifyDemandedBits(Op, DemandedBits);
344 }
345
346 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
347 EVT VT = Op.getValueType();
348 APInt DemandedElts = VT.isFixedLengthVector()
350 : APInt(1, 1);
351 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
352 }
353
354 /// Check the specified vector node value to see if it can be simplified or
355 /// if things it uses can be simplified as it only uses some of the
356 /// elements. If so, return true.
357 bool SimplifyDemandedVectorElts(SDValue Op) {
358 // TODO: For now just pretend it cannot be simplified.
359 if (Op.getValueType().isScalableVector())
360 return false;
361
362 unsigned NumElts = Op.getValueType().getVectorNumElements();
363 APInt DemandedElts = APInt::getAllOnes(NumElts);
364 return SimplifyDemandedVectorElts(Op, DemandedElts);
365 }
366
367 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
368 const APInt &DemandedElts,
369 bool AssumeSingleUse = false);
370 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
371 bool AssumeSingleUse = false);
372
373 bool CombineToPreIndexedLoadStore(SDNode *N);
374 bool CombineToPostIndexedLoadStore(SDNode *N);
375 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
376 bool SliceUpLoad(SDNode *N);
377
378 // Looks up the chain to find a unique (unaliased) store feeding the passed
379 // load. If no such store is found, returns a nullptr.
380 // Note: This will look past a CALLSEQ_START if the load is chained to it so
381 // so that it can find stack stores for byval params.
382 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
383 // Scalars have size 0 to distinguish from singleton vectors.
384 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
385 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
386 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
387
388 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
389 /// load.
390 ///
391 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
392 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
393 /// \param EltNo index of the vector element to load.
394 /// \param OriginalLoad load that EVE came from to be replaced.
395 /// \returns EVE on success SDValue() on failure.
396 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
397 SDValue EltNo,
398 LoadSDNode *OriginalLoad);
399 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
400 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
401 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
402 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
403 SDValue PromoteIntBinOp(SDValue Op);
404 SDValue PromoteIntShiftOp(SDValue Op);
405 SDValue PromoteExtend(SDValue Op);
406 bool PromoteLoad(SDValue Op);
407
408 SDValue foldShiftToAvg(SDNode *N);
409
410 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
411 SDValue RHS, SDValue True, SDValue False,
413
414 /// Call the node-specific routine that knows how to fold each
415 /// particular type of node. If that doesn't do anything, try the
416 /// target-specific DAG combines.
417 SDValue combine(SDNode *N);
418
419 // Visitation implementation - Implement dag node combining for different
420 // node types. The semantics are as follows:
421 // Return Value:
422 // SDValue.getNode() == 0 - No change was made
423 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
424 // otherwise - N should be replaced by the returned Operand.
425 //
426 SDValue visitTokenFactor(SDNode *N);
427 SDValue visitMERGE_VALUES(SDNode *N);
428 SDValue visitADD(SDNode *N);
429 SDValue visitADDLike(SDNode *N);
430 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
431 SDValue visitSUB(SDNode *N);
432 SDValue visitADDSAT(SDNode *N);
433 SDValue visitSUBSAT(SDNode *N);
434 SDValue visitADDC(SDNode *N);
435 SDValue visitADDO(SDNode *N);
436 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
437 SDValue visitSUBC(SDNode *N);
438 SDValue visitSUBO(SDNode *N);
439 SDValue visitADDE(SDNode *N);
440 SDValue visitUADDO_CARRY(SDNode *N);
441 SDValue visitSADDO_CARRY(SDNode *N);
442 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
443 SDNode *N);
444 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
445 SDNode *N);
446 SDValue visitSUBE(SDNode *N);
447 SDValue visitUSUBO_CARRY(SDNode *N);
448 SDValue visitSSUBO_CARRY(SDNode *N);
449 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
450 SDValue visitMULFIX(SDNode *N);
451 SDValue useDivRem(SDNode *N);
452 SDValue visitSDIV(SDNode *N);
453 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
454 SDValue visitUDIV(SDNode *N);
455 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
456 SDValue visitREM(SDNode *N);
457 SDValue visitMULHU(SDNode *N);
458 SDValue visitMULHS(SDNode *N);
459 SDValue visitAVG(SDNode *N);
460 SDValue visitABD(SDNode *N);
461 SDValue visitSMUL_LOHI(SDNode *N);
462 SDValue visitUMUL_LOHI(SDNode *N);
463 SDValue visitMULO(SDNode *N);
464 SDValue visitIMINMAX(SDNode *N);
465 SDValue visitAND(SDNode *N);
466 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
467 SDValue visitOR(SDNode *N);
468 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
469 SDValue visitXOR(SDNode *N);
470 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
471 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
472 SDValue visitSHL(SDNode *N);
473 SDValue visitSRA(SDNode *N);
474 SDValue visitSRL(SDNode *N);
475 SDValue visitFunnelShift(SDNode *N);
476 SDValue visitSHLSAT(SDNode *N);
477 SDValue visitRotate(SDNode *N);
478 SDValue visitABS(SDNode *N);
479 SDValue visitBSWAP(SDNode *N);
480 SDValue visitBITREVERSE(SDNode *N);
481 SDValue visitCTLZ(SDNode *N);
482 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
483 SDValue visitCTTZ(SDNode *N);
484 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
485 SDValue visitCTPOP(SDNode *N);
486 SDValue visitSELECT(SDNode *N);
487 SDValue visitVSELECT(SDNode *N);
488 SDValue visitVP_SELECT(SDNode *N);
489 SDValue visitSELECT_CC(SDNode *N);
490 SDValue visitSETCC(SDNode *N);
491 SDValue visitSETCCCARRY(SDNode *N);
492 SDValue visitSIGN_EXTEND(SDNode *N);
493 SDValue visitZERO_EXTEND(SDNode *N);
494 SDValue visitANY_EXTEND(SDNode *N);
495 SDValue visitAssertExt(SDNode *N);
496 SDValue visitAssertAlign(SDNode *N);
497 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
498 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
499 SDValue visitTRUNCATE(SDNode *N);
500 SDValue visitTRUNCATE_USAT_U(SDNode *N);
501 SDValue visitBITCAST(SDNode *N);
502 SDValue visitFREEZE(SDNode *N);
503 SDValue visitBUILD_PAIR(SDNode *N);
504 SDValue visitFADD(SDNode *N);
505 SDValue visitVP_FADD(SDNode *N);
506 SDValue visitVP_FSUB(SDNode *N);
507 SDValue visitSTRICT_FADD(SDNode *N);
508 SDValue visitFSUB(SDNode *N);
509 SDValue visitFMUL(SDNode *N);
510 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
511 SDValue visitFMAD(SDNode *N);
512 SDValue visitFDIV(SDNode *N);
513 SDValue visitFREM(SDNode *N);
514 SDValue visitFSQRT(SDNode *N);
515 SDValue visitFCOPYSIGN(SDNode *N);
516 SDValue visitFPOW(SDNode *N);
517 SDValue visitFCANONICALIZE(SDNode *N);
518 SDValue visitSINT_TO_FP(SDNode *N);
519 SDValue visitUINT_TO_FP(SDNode *N);
520 SDValue visitFP_TO_SINT(SDNode *N);
521 SDValue visitFP_TO_UINT(SDNode *N);
522 SDValue visitXROUND(SDNode *N);
523 SDValue visitFP_ROUND(SDNode *N);
524 SDValue visitFP_EXTEND(SDNode *N);
525 SDValue visitFNEG(SDNode *N);
526 SDValue visitFABS(SDNode *N);
527 SDValue visitFCEIL(SDNode *N);
528 SDValue visitFTRUNC(SDNode *N);
529 SDValue visitFFREXP(SDNode *N);
530 SDValue visitFFLOOR(SDNode *N);
531 SDValue visitFMinMax(SDNode *N);
532 SDValue visitBRCOND(SDNode *N);
533 SDValue visitBR_CC(SDNode *N);
534 SDValue visitLOAD(SDNode *N);
535
536 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
537 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
538 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
539
540 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
541
542 SDValue visitSTORE(SDNode *N);
543 SDValue visitATOMIC_STORE(SDNode *N);
544 SDValue visitLIFETIME_END(SDNode *N);
545 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
546 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
547 SDValue visitBUILD_VECTOR(SDNode *N);
548 SDValue visitCONCAT_VECTORS(SDNode *N);
549 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
550 SDValue visitVECTOR_SHUFFLE(SDNode *N);
551 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
552 SDValue visitINSERT_SUBVECTOR(SDNode *N);
553 SDValue visitVECTOR_COMPRESS(SDNode *N);
554 SDValue visitMLOAD(SDNode *N);
555 SDValue visitMSTORE(SDNode *N);
556 SDValue visitMGATHER(SDNode *N);
557 SDValue visitMSCATTER(SDNode *N);
558 SDValue visitMHISTOGRAM(SDNode *N);
559 SDValue visitVPGATHER(SDNode *N);
560 SDValue visitVPSCATTER(SDNode *N);
561 SDValue visitVP_STRIDED_LOAD(SDNode *N);
562 SDValue visitVP_STRIDED_STORE(SDNode *N);
563 SDValue visitFP_TO_FP16(SDNode *N);
564 SDValue visitFP16_TO_FP(SDNode *N);
565 SDValue visitFP_TO_BF16(SDNode *N);
566 SDValue visitBF16_TO_FP(SDNode *N);
567 SDValue visitVECREDUCE(SDNode *N);
568 SDValue visitVPOp(SDNode *N);
569 SDValue visitGET_FPENV_MEM(SDNode *N);
570 SDValue visitSET_FPENV_MEM(SDNode *N);
571
572 template <class MatchContextClass>
573 SDValue visitFADDForFMACombine(SDNode *N);
574 template <class MatchContextClass>
575 SDValue visitFSUBForFMACombine(SDNode *N);
576 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
577
578 SDValue XformToShuffleWithZero(SDNode *N);
579 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
580 const SDLoc &DL,
581 SDNode *N,
582 SDValue N0,
583 SDValue N1);
584 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
585 SDValue N1, SDNodeFlags Flags);
586 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
587 SDValue N1, SDNodeFlags Flags);
588 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
589 EVT VT, SDValue N0, SDValue N1,
590 SDNodeFlags Flags = SDNodeFlags());
591
592 SDValue visitShiftByConstant(SDNode *N);
593
594 SDValue foldSelectOfConstants(SDNode *N);
595 SDValue foldVSelectOfConstants(SDNode *N);
596 SDValue foldBinOpIntoSelect(SDNode *BO);
597 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
598 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
599 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
600 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
602 bool NotExtCompare = false);
603 SDValue convertSelectOfFPConstantsToLoadOffset(
604 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
606 SDValue foldSignChangeInBitcast(SDNode *N);
607 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
609 SDValue foldSelectOfBinops(SDNode *N);
610 SDValue foldSextSetcc(SDNode *N);
611 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
612 const SDLoc &DL);
613 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
614 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
615 SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
616 SDValue False, ISD::CondCode CC, const SDLoc &DL);
617 SDValue unfoldMaskedMerge(SDNode *N);
618 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
619 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
620 const SDLoc &DL, bool foldBooleans);
621 SDValue rebuildSetCC(SDValue N);
622
623 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
624 SDValue &CC, bool MatchStrict = false) const;
625 bool isOneUseSetCC(SDValue N) const;
626
627 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
628 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
629
630 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
631 unsigned HiOp);
632 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
633 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
634 const TargetLowering &TLI);
635
636 SDValue CombineExtLoad(SDNode *N);
637 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
638 SDValue combineRepeatedFPDivisors(SDNode *N);
639 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
640 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
641 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
642 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
643 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
644 SDValue BuildSDIV(SDNode *N);
645 SDValue BuildSDIVPow2(SDNode *N);
646 SDValue BuildUDIV(SDNode *N);
647 SDValue BuildSREMPow2(SDNode *N);
648 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
649 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
650 bool KnownNeverZero = false,
651 bool InexpensiveOnly = false,
652 std::optional<EVT> OutVT = std::nullopt);
653 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
654 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
655 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
656 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
657 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
658 SDNodeFlags Flags, bool Reciprocal);
659 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
660 SDNodeFlags Flags, bool Reciprocal);
661 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
662 bool DemandHighBits = true);
663 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
664 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
665 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
666 unsigned PosOpcode, unsigned NegOpcode,
667 const SDLoc &DL);
668 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
669 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
670 unsigned PosOpcode, unsigned NegOpcode,
671 const SDLoc &DL);
672 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
673 SDValue MatchLoadCombine(SDNode *N);
674 SDValue mergeTruncStores(StoreSDNode *N);
675 SDValue reduceLoadWidth(SDNode *N);
676 SDValue ReduceLoadOpStoreWidth(SDNode *N);
678 SDValue TransformFPLoadStorePair(SDNode *N);
679 SDValue convertBuildVecZextToZext(SDNode *N);
680 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
681 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
682 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
683 SDValue reduceBuildVecToShuffle(SDNode *N);
684 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
685 ArrayRef<int> VectorMask, SDValue VecIn1,
686 SDValue VecIn2, unsigned LeftIdx,
687 bool DidSplitVec);
688 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
689
690 /// Walk up chain skipping non-aliasing memory nodes,
691 /// looking for aliasing nodes and adding them to the Aliases vector.
692 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
693 SmallVectorImpl<SDValue> &Aliases);
694
695 /// Return true if there is any possibility that the two addresses overlap.
696 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
697
698 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
699 /// chain (aliasing node.)
700 SDValue FindBetterChain(SDNode *N, SDValue Chain);
701
702 /// Try to replace a store and any possibly adjacent stores on
703 /// consecutive chains with better chains. Return true only if St is
704 /// replaced.
705 ///
706 /// Notice that other chains may still be replaced even if the function
707 /// returns false.
708 bool findBetterNeighborChains(StoreSDNode *St);
709
710 // Helper for findBetterNeighborChains. Walk up store chain add additional
711 // chained stores that do not overlap and can be parallelized.
712 bool parallelizeChainedStores(StoreSDNode *St);
713
714 /// Holds a pointer to an LSBaseSDNode as well as information on where it
715 /// is located in a sequence of memory operations connected by a chain.
716 struct MemOpLink {
717 // Ptr to the mem node.
718 LSBaseSDNode *MemNode;
719
720 // Offset from the base ptr.
721 int64_t OffsetFromBase;
722
723 MemOpLink(LSBaseSDNode *N, int64_t Offset)
724 : MemNode(N), OffsetFromBase(Offset) {}
725 };
726
727 // Classify the origin of a stored value.
728 enum class StoreSource { Unknown, Constant, Extract, Load };
729 StoreSource getStoreSource(SDValue StoreVal) {
730 switch (StoreVal.getOpcode()) {
731 case ISD::Constant:
732 case ISD::ConstantFP:
733 return StoreSource::Constant;
737 return StoreSource::Constant;
738 return StoreSource::Unknown;
741 return StoreSource::Extract;
742 case ISD::LOAD:
743 return StoreSource::Load;
744 default:
745 return StoreSource::Unknown;
746 }
747 }
748
749 /// This is a helper function for visitMUL to check the profitability
750 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
751 /// MulNode is the original multiply, AddNode is (add x, c1),
752 /// and ConstNode is c2.
753 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
754 SDValue ConstNode);
755
756 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
757 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
758 /// the type of the loaded value to be extended.
759 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
760 EVT LoadResultTy, EVT &ExtVT);
761
762 /// Helper function to calculate whether the given Load/Store can have its
763 /// width reduced to ExtVT.
764 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
765 EVT &MemVT, unsigned ShAmt = 0);
766
767 /// Used by BackwardsPropagateMask to find suitable loads.
768 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
769 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
770 ConstantSDNode *Mask, SDNode *&NodeToMask);
771 /// Attempt to propagate a given AND node back to load leaves so that they
772 /// can be combined into narrow loads.
773 bool BackwardsPropagateMask(SDNode *N);
774
775 /// Helper function for mergeConsecutiveStores which merges the component
776 /// store chains.
777 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
778 unsigned NumStores);
779
780 /// Helper function for mergeConsecutiveStores which checks if all the store
781 /// nodes have the same underlying object. We can still reuse the first
782 /// store's pointer info if all the stores are from the same object.
783 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
784
785 /// This is a helper function for mergeConsecutiveStores. When the source
786 /// elements of the consecutive stores are all constants or all extracted
787 /// vector elements, try to merge them into one larger store introducing
788 /// bitcasts if necessary. \return True if a merged store was created.
789 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
790 EVT MemVT, unsigned NumStores,
791 bool IsConstantSrc, bool UseVector,
792 bool UseTrunc);
793
794 /// This is a helper function for mergeConsecutiveStores. Stores that
795 /// potentially may be merged with St are placed in StoreNodes. On success,
796 /// returns a chain predecessor to all store candidates.
797 SDNode *getStoreMergeCandidates(StoreSDNode *St,
798 SmallVectorImpl<MemOpLink> &StoreNodes);
799
800 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
801 /// have indirect dependency through their operands. RootNode is the
802 /// predecessor to all stores calculated by getStoreMergeCandidates and is
803 /// used to prune the dependency check. \return True if safe to merge.
804 bool checkMergeStoreCandidatesForDependencies(
805 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
806 SDNode *RootNode);
807
808 /// This is a helper function for mergeConsecutiveStores. Given a list of
809 /// store candidates, find the first N that are consecutive in memory.
810 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
811 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
812 int64_t ElementSizeBytes) const;
813
814 /// This is a helper function for mergeConsecutiveStores. It is used for
815 /// store chains that are composed entirely of constant values.
816 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
817 unsigned NumConsecutiveStores,
818 EVT MemVT, SDNode *Root, bool AllowVectors);
819
820 /// This is a helper function for mergeConsecutiveStores. It is used for
821 /// store chains that are composed entirely of extracted vector elements.
822 /// When extracting multiple vector elements, try to store them in one
823 /// vector store rather than a sequence of scalar stores.
824 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
825 unsigned NumConsecutiveStores, EVT MemVT,
826 SDNode *Root);
827
828 /// This is a helper function for mergeConsecutiveStores. It is used for
829 /// store chains that are composed entirely of loaded values.
830 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
831 unsigned NumConsecutiveStores, EVT MemVT,
832 SDNode *Root, bool AllowVectors,
833 bool IsNonTemporalStore, bool IsNonTemporalLoad);
834
835 /// Merge consecutive store operations into a wide store.
836 /// This optimization uses wide integers or vectors when possible.
837 /// \return true if stores were merged.
838 bool mergeConsecutiveStores(StoreSDNode *St);
839
840 /// Try to transform a truncation where C is a constant:
841 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
842 ///
843 /// \p N needs to be a truncation and its first operand an AND. Other
844 /// requirements are checked by the function (e.g. that trunc is
845 /// single-use) and if missed an empty SDValue is returned.
846 SDValue distributeTruncateThroughAnd(SDNode *N);
847
848 /// Helper function to determine whether the target supports operation
849 /// given by \p Opcode for type \p VT, that is, whether the operation
850 /// is legal or custom before legalizing operations, and whether is
851 /// legal (but not custom) after legalization.
852 bool hasOperation(unsigned Opcode, EVT VT) {
853 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
854 }
855
856 public:
857 /// Runs the dag combiner on all nodes in the work list
858 void Run(CombineLevel AtLevel);
859
860 SelectionDAG &getDAG() const { return DAG; }
861
862 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
863 EVT getShiftAmountTy(EVT LHSTy) {
864 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
865 }
866
867 /// This method returns true if we are running before type legalization or
868 /// if the specified VT is legal.
869 bool isTypeLegal(const EVT &VT) {
870 if (!LegalTypes) return true;
871 return TLI.isTypeLegal(VT);
872 }
873
874 /// Convenience wrapper around TargetLowering::getSetCCResultType
875 EVT getSetCCResultType(EVT VT) const {
876 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
877 }
878
879 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
880 SDValue OrigLoad, SDValue ExtLoad,
881 ISD::NodeType ExtType);
882 };
883
884/// This class is a DAGUpdateListener that removes any deleted
885/// nodes from the worklist.
886class WorklistRemover : public SelectionDAG::DAGUpdateListener {
887 DAGCombiner &DC;
888
889public:
890 explicit WorklistRemover(DAGCombiner &dc)
891 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
892
893 void NodeDeleted(SDNode *N, SDNode *E) override {
894 DC.removeFromWorklist(N);
895 }
896};
897
898class WorklistInserter : public SelectionDAG::DAGUpdateListener {
899 DAGCombiner &DC;
900
901public:
902 explicit WorklistInserter(DAGCombiner &dc)
903 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
904
905 // FIXME: Ideally we could add N to the worklist, but this causes exponential
906 // compile time costs in large DAGs, e.g. Halide.
907 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
908};
909
910} // end anonymous namespace
911
912//===----------------------------------------------------------------------===//
913// TargetLowering::DAGCombinerInfo implementation
914//===----------------------------------------------------------------------===//
915
917 ((DAGCombiner*)DC)->AddToWorklist(N);
918}
919
921CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
922 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
923}
924
926CombineTo(SDNode *N, SDValue Res, bool AddTo) {
927 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
928}
929
931CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
932 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
933}
934
937 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
938}
939
942 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
943}
944
945//===----------------------------------------------------------------------===//
946// Helper Functions
947//===----------------------------------------------------------------------===//
948
949void DAGCombiner::deleteAndRecombine(SDNode *N) {
950 removeFromWorklist(N);
951
952 // If the operands of this node are only used by the node, they will now be
953 // dead. Make sure to re-visit them and recursively delete dead nodes.
954 for (const SDValue &Op : N->ops())
955 // For an operand generating multiple values, one of the values may
956 // become dead allowing further simplification (e.g. split index
957 // arithmetic from an indexed load).
958 if (Op->hasOneUse() || Op->getNumValues() > 1)
959 AddToWorklist(Op.getNode());
960
961 DAG.DeleteNode(N);
962}
963
964// APInts must be the same size for most operations, this helper
965// function zero extends the shorter of the pair so that they match.
966// We provide an Offset so that we can create bitwidths that won't overflow.
967static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
968 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
969 LHS = LHS.zext(Bits);
970 RHS = RHS.zext(Bits);
971}
972
973// Return true if this node is a setcc, or is a select_cc
974// that selects between the target values used for true and false, making it
975// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
976// the appropriate nodes based on the type of node we are checking. This
977// simplifies life a bit for the callers.
978bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
979 SDValue &CC, bool MatchStrict) const {
980 if (N.getOpcode() == ISD::SETCC) {
981 LHS = N.getOperand(0);
982 RHS = N.getOperand(1);
983 CC = N.getOperand(2);
984 return true;
985 }
986
987 if (MatchStrict &&
988 (N.getOpcode() == ISD::STRICT_FSETCC ||
989 N.getOpcode() == ISD::STRICT_FSETCCS)) {
990 LHS = N.getOperand(1);
991 RHS = N.getOperand(2);
992 CC = N.getOperand(3);
993 return true;
994 }
995
996 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
997 !TLI.isConstFalseVal(N.getOperand(3)))
998 return false;
999
1000 if (TLI.getBooleanContents(N.getValueType()) ==
1002 return false;
1003
1004 LHS = N.getOperand(0);
1005 RHS = N.getOperand(1);
1006 CC = N.getOperand(4);
1007 return true;
1008}
1009
1010/// Return true if this is a SetCC-equivalent operation with only one use.
1011/// If this is true, it allows the users to invert the operation for free when
1012/// it is profitable to do so.
1013bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1014 SDValue N0, N1, N2;
1015 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1016 return true;
1017 return false;
1018}
1019
1021 if (!ScalarTy.isSimple())
1022 return false;
1023
1024 uint64_t MaskForTy = 0ULL;
1025 switch (ScalarTy.getSimpleVT().SimpleTy) {
1026 case MVT::i8:
1027 MaskForTy = 0xFFULL;
1028 break;
1029 case MVT::i16:
1030 MaskForTy = 0xFFFFULL;
1031 break;
1032 case MVT::i32:
1033 MaskForTy = 0xFFFFFFFFULL;
1034 break;
1035 default:
1036 return false;
1037 break;
1038 }
1039
1040 APInt Val;
1041 if (ISD::isConstantSplatVector(N, Val))
1042 return Val.getLimitedValue() == MaskForTy;
1043
1044 return false;
1045}
1046
1047// Determines if it is a constant integer or a splat/build vector of constant
1048// integers (and undefs).
1049// Do not permit build vector implicit truncation.
1050static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1051 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1052 return !(Const->isOpaque() && NoOpaques);
1053 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1054 return false;
1055 unsigned BitWidth = N.getScalarValueSizeInBits();
1056 for (const SDValue &Op : N->op_values()) {
1057 if (Op.isUndef())
1058 continue;
1059 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1060 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1061 (Const->isOpaque() && NoOpaques))
1062 return false;
1063 }
1064 return true;
1065}
1066
1067// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1068// undef's.
1069static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1070 if (V.getOpcode() != ISD::BUILD_VECTOR)
1071 return false;
1072 return isConstantOrConstantVector(V, NoOpaques) ||
1074}
1075
1076// Determine if this an indexed load with an opaque target constant index.
1077static bool canSplitIdx(LoadSDNode *LD) {
1078 return MaySplitLoadIndex &&
1079 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1080 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1081}
1082
1083bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1084 const SDLoc &DL,
1085 SDNode *N,
1086 SDValue N0,
1087 SDValue N1) {
1088 // Currently this only tries to ensure we don't undo the GEP splits done by
1089 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1090 // we check if the following transformation would be problematic:
1091 // (load/store (add, (add, x, offset1), offset2)) ->
1092 // (load/store (add, x, offset1+offset2)).
1093
1094 // (load/store (add, (add, x, y), offset2)) ->
1095 // (load/store (add, (add, x, offset2), y)).
1096
1097 if (N0.getOpcode() != ISD::ADD)
1098 return false;
1099
1100 // Check for vscale addressing modes.
1101 // (load/store (add/sub (add x, y), vscale))
1102 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1103 // (load/store (add/sub (add x, y), (mul vscale, C)))
1104 if ((N1.getOpcode() == ISD::VSCALE ||
1105 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1106 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1107 isa<ConstantSDNode>(N1.getOperand(1)))) &&
1108 N1.getValueType().getFixedSizeInBits() <= 64) {
1109 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1110 ? N1.getConstantOperandVal(0)
1111 : (N1.getOperand(0).getConstantOperandVal(0) *
1112 (N1.getOpcode() == ISD::SHL
1113 ? (1LL << N1.getConstantOperandVal(1))
1114 : N1.getConstantOperandVal(1)));
1115 if (Opc == ISD::SUB)
1116 ScalableOffset = -ScalableOffset;
1117 if (all_of(N->users(), [&](SDNode *Node) {
1118 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1119 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1121 AM.HasBaseReg = true;
1122 AM.ScalableOffset = ScalableOffset;
1123 EVT VT = LoadStore->getMemoryVT();
1124 unsigned AS = LoadStore->getAddressSpace();
1125 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1126 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1127 AS);
1128 }
1129 return false;
1130 }))
1131 return true;
1132 }
1133
1134 if (Opc != ISD::ADD)
1135 return false;
1136
1137 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1138 if (!C2)
1139 return false;
1140
1141 const APInt &C2APIntVal = C2->getAPIntValue();
1142 if (C2APIntVal.getSignificantBits() > 64)
1143 return false;
1144
1145 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1146 if (N0.hasOneUse())
1147 return false;
1148
1149 const APInt &C1APIntVal = C1->getAPIntValue();
1150 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1151 if (CombinedValueIntVal.getSignificantBits() > 64)
1152 return false;
1153 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1154
1155 for (SDNode *Node : N->users()) {
1156 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1157 // Is x[offset2] already not a legal addressing mode? If so then
1158 // reassociating the constants breaks nothing (we test offset2 because
1159 // that's the one we hope to fold into the load or store).
1161 AM.HasBaseReg = true;
1162 AM.BaseOffs = C2APIntVal.getSExtValue();
1163 EVT VT = LoadStore->getMemoryVT();
1164 unsigned AS = LoadStore->getAddressSpace();
1165 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1166 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1167 continue;
1168
1169 // Would x[offset1+offset2] still be a legal addressing mode?
1170 AM.BaseOffs = CombinedValue;
1171 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1172 return true;
1173 }
1174 }
1175 } else {
1176 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1177 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1178 return false;
1179
1180 for (SDNode *Node : N->users()) {
1181 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1182 if (!LoadStore)
1183 return false;
1184
1185 // Is x[offset2] a legal addressing mode? If so then
1186 // reassociating the constants breaks address pattern
1188 AM.HasBaseReg = true;
1189 AM.BaseOffs = C2APIntVal.getSExtValue();
1190 EVT VT = LoadStore->getMemoryVT();
1191 unsigned AS = LoadStore->getAddressSpace();
1192 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1193 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1194 return false;
1195 }
1196 return true;
1197 }
1198
1199 return false;
1200}
1201
1202/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1203/// \p N0 is the same kind of operation as \p Opc.
1204SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1205 SDValue N0, SDValue N1,
1206 SDNodeFlags Flags) {
1207 EVT VT = N0.getValueType();
1208
1209 if (N0.getOpcode() != Opc)
1210 return SDValue();
1211
1212 SDValue N00 = N0.getOperand(0);
1213 SDValue N01 = N0.getOperand(1);
1214
1216 SDNodeFlags NewFlags;
1217 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1218 Flags.hasNoUnsignedWrap())
1219 NewFlags |= SDNodeFlags::NoUnsignedWrap;
1220
1222 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1223 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) {
1224 NewFlags.setDisjoint(Flags.hasDisjoint() &&
1225 N0->getFlags().hasDisjoint());
1226 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1227 }
1228 return SDValue();
1229 }
1230 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1231 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1232 // iff (op x, c1) has one use
1233 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1234 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1235 }
1236 }
1237
1238 // Check for repeated operand logic simplifications.
1239 if (Opc == ISD::AND || Opc == ISD::OR) {
1240 // (N00 & N01) & N00 --> N00 & N01
1241 // (N00 & N01) & N01 --> N00 & N01
1242 // (N00 | N01) | N00 --> N00 | N01
1243 // (N00 | N01) | N01 --> N00 | N01
1244 if (N1 == N00 || N1 == N01)
1245 return N0;
1246 }
1247 if (Opc == ISD::XOR) {
1248 // (N00 ^ N01) ^ N00 --> N01
1249 if (N1 == N00)
1250 return N01;
1251 // (N00 ^ N01) ^ N01 --> N00
1252 if (N1 == N01)
1253 return N00;
1254 }
1255
1256 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1257 if (N1 != N01) {
1258 // Reassociate if (op N00, N1) already exist
1259 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1260 // if Op (Op N00, N1), N01 already exist
1261 // we need to stop reassciate to avoid dead loop
1262 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1263 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1264 }
1265 }
1266
1267 if (N1 != N00) {
1268 // Reassociate if (op N01, N1) already exist
1269 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1270 // if Op (Op N01, N1), N00 already exist
1271 // we need to stop reassciate to avoid dead loop
1272 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1273 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1274 }
1275 }
1276
1277 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1278 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1279 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1280 // comparisons with the same predicate. This enables optimizations as the
1281 // following one:
1282 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1283 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1284 if (Opc == ISD::AND || Opc == ISD::OR) {
1285 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1286 N01->getOpcode() == ISD::SETCC) {
1287 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1288 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1289 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1290 if (CC1 == CC00 && CC1 != CC01) {
1291 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1292 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1293 }
1294 if (CC1 == CC01 && CC1 != CC00) {
1295 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1296 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1297 }
1298 }
1299 }
1300 }
1301
1302 return SDValue();
1303}
1304
1305/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1306/// same kind of operation as \p Opc.
1307SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1308 SDValue N1, SDNodeFlags Flags) {
1309 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1310
1311 // Floating-point reassociation is not allowed without loose FP math.
1312 if (N0.getValueType().isFloatingPoint() ||
1314 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1315 return SDValue();
1316
1317 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1318 return Combined;
1319 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1320 return Combined;
1321 return SDValue();
1322}
1323
1324// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1325// Note that we only expect Flags to be passed from FP operations. For integer
1326// operations they need to be dropped.
1327SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1328 const SDLoc &DL, EVT VT, SDValue N0,
1329 SDValue N1, SDNodeFlags Flags) {
1330 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1331 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1332 N0->hasOneUse() && N1->hasOneUse() &&
1334 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1335 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1336 return DAG.getNode(RedOpc, DL, VT,
1337 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1338 N0.getOperand(0), N1.getOperand(0)));
1339 }
1340 return SDValue();
1341}
1342
1343SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1344 bool AddTo) {
1345 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1346 ++NodesCombined;
1347 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1348 To[0].dump(&DAG);
1349 dbgs() << " and " << NumTo - 1 << " other values\n");
1350 for (unsigned i = 0, e = NumTo; i != e; ++i)
1351 assert((!To[i].getNode() ||
1352 N->getValueType(i) == To[i].getValueType()) &&
1353 "Cannot combine value to value of different type!");
1354
1355 WorklistRemover DeadNodes(*this);
1356 DAG.ReplaceAllUsesWith(N, To);
1357 if (AddTo) {
1358 // Push the new nodes and any users onto the worklist
1359 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1360 if (To[i].getNode())
1361 AddToWorklistWithUsers(To[i].getNode());
1362 }
1363 }
1364
1365 // Finally, if the node is now dead, remove it from the graph. The node
1366 // may not be dead if the replacement process recursively simplified to
1367 // something else needing this node.
1368 if (N->use_empty())
1369 deleteAndRecombine(N);
1370 return SDValue(N, 0);
1371}
1372
1373void DAGCombiner::
1374CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1375 // Replace the old value with the new one.
1376 ++NodesCombined;
1377 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1378 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1379
1380 // Replace all uses.
1381 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1382
1383 // Push the new node and any (possibly new) users onto the worklist.
1384 AddToWorklistWithUsers(TLO.New.getNode());
1385
1386 // Finally, if the node is now dead, remove it from the graph.
1387 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1388}
1389
1390/// Check the specified integer node value to see if it can be simplified or if
1391/// things it uses can be simplified by bit propagation. If so, return true.
1392bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1393 const APInt &DemandedElts,
1394 bool AssumeSingleUse) {
1395 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1396 KnownBits Known;
1397 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1398 AssumeSingleUse))
1399 return false;
1400
1401 // Revisit the node.
1402 AddToWorklist(Op.getNode());
1403
1404 CommitTargetLoweringOpt(TLO);
1405 return true;
1406}
1407
1408/// Check the specified vector node value to see if it can be simplified or
1409/// if things it uses can be simplified as it only uses some of the elements.
1410/// If so, return true.
1411bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1412 const APInt &DemandedElts,
1413 bool AssumeSingleUse) {
1414 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1415 APInt KnownUndef, KnownZero;
1416 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1417 TLO, 0, AssumeSingleUse))
1418 return false;
1419
1420 // Revisit the node.
1421 AddToWorklist(Op.getNode());
1422
1423 CommitTargetLoweringOpt(TLO);
1424 return true;
1425}
1426
1427void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1428 SDLoc DL(Load);
1429 EVT VT = Load->getValueType(0);
1430 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1431
1432 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1433 Trunc.dump(&DAG); dbgs() << '\n');
1434
1435 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1436 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1437
1438 AddToWorklist(Trunc.getNode());
1439 recursivelyDeleteUnusedNodes(Load);
1440}
1441
1442SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1443 Replace = false;
1444 SDLoc DL(Op);
1445 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1446 LoadSDNode *LD = cast<LoadSDNode>(Op);
1447 EVT MemVT = LD->getMemoryVT();
1449 : LD->getExtensionType();
1450 Replace = true;
1451 return DAG.getExtLoad(ExtType, DL, PVT,
1452 LD->getChain(), LD->getBasePtr(),
1453 MemVT, LD->getMemOperand());
1454 }
1455
1456 unsigned Opc = Op.getOpcode();
1457 switch (Opc) {
1458 default: break;
1459 case ISD::AssertSext:
1460 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1461 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1462 break;
1463 case ISD::AssertZext:
1464 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1465 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1466 break;
1467 case ISD::Constant: {
1468 unsigned ExtOpc =
1469 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1470 return DAG.getNode(ExtOpc, DL, PVT, Op);
1471 }
1472 }
1473
1474 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1475 return SDValue();
1476 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1477}
1478
1479SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1481 return SDValue();
1482 EVT OldVT = Op.getValueType();
1483 SDLoc DL(Op);
1484 bool Replace = false;
1485 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1486 if (!NewOp.getNode())
1487 return SDValue();
1488 AddToWorklist(NewOp.getNode());
1489
1490 if (Replace)
1491 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1492 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1493 DAG.getValueType(OldVT));
1494}
1495
1496SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1497 EVT OldVT = Op.getValueType();
1498 SDLoc DL(Op);
1499 bool Replace = false;
1500 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1501 if (!NewOp.getNode())
1502 return SDValue();
1503 AddToWorklist(NewOp.getNode());
1504
1505 if (Replace)
1506 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1507 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1508}
1509
1510/// Promote the specified integer binary operation if the target indicates it is
1511/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1512/// i32 since i16 instructions are longer.
1513SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1514 if (!LegalOperations)
1515 return SDValue();
1516
1517 EVT VT = Op.getValueType();
1518 if (VT.isVector() || !VT.isInteger())
1519 return SDValue();
1520
1521 // If operation type is 'undesirable', e.g. i16 on x86, consider
1522 // promoting it.
1523 unsigned Opc = Op.getOpcode();
1524 if (TLI.isTypeDesirableForOp(Opc, VT))
1525 return SDValue();
1526
1527 EVT PVT = VT;
1528 // Consult target whether it is a good idea to promote this operation and
1529 // what's the right type to promote it to.
1530 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1531 assert(PVT != VT && "Don't know what type to promote to!");
1532
1533 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1534
1535 bool Replace0 = false;
1536 SDValue N0 = Op.getOperand(0);
1537 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1538
1539 bool Replace1 = false;
1540 SDValue N1 = Op.getOperand(1);
1541 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1542 SDLoc DL(Op);
1543
1544 SDValue RV =
1545 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1546
1547 // We are always replacing N0/N1's use in N and only need additional
1548 // replacements if there are additional uses.
1549 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1550 // (SDValue) here because the node may reference multiple values
1551 // (for example, the chain value of a load node).
1552 Replace0 &= !N0->hasOneUse();
1553 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1554
1555 // Combine Op here so it is preserved past replacements.
1556 CombineTo(Op.getNode(), RV);
1557
1558 // If operands have a use ordering, make sure we deal with
1559 // predecessor first.
1560 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1561 std::swap(N0, N1);
1562 std::swap(NN0, NN1);
1563 }
1564
1565 if (Replace0) {
1566 AddToWorklist(NN0.getNode());
1567 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1568 }
1569 if (Replace1) {
1570 AddToWorklist(NN1.getNode());
1571 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1572 }
1573 return Op;
1574 }
1575 return SDValue();
1576}
1577
1578/// Promote the specified integer shift operation if the target indicates it is
1579/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1580/// i32 since i16 instructions are longer.
1581SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1582 if (!LegalOperations)
1583 return SDValue();
1584
1585 EVT VT = Op.getValueType();
1586 if (VT.isVector() || !VT.isInteger())
1587 return SDValue();
1588
1589 // If operation type is 'undesirable', e.g. i16 on x86, consider
1590 // promoting it.
1591 unsigned Opc = Op.getOpcode();
1592 if (TLI.isTypeDesirableForOp(Opc, VT))
1593 return SDValue();
1594
1595 EVT PVT = VT;
1596 // Consult target whether it is a good idea to promote this operation and
1597 // what's the right type to promote it to.
1598 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1599 assert(PVT != VT && "Don't know what type to promote to!");
1600
1601 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1602
1603 bool Replace = false;
1604 SDValue N0 = Op.getOperand(0);
1605 if (Opc == ISD::SRA)
1606 N0 = SExtPromoteOperand(N0, PVT);
1607 else if (Opc == ISD::SRL)
1608 N0 = ZExtPromoteOperand(N0, PVT);
1609 else
1610 N0 = PromoteOperand(N0, PVT, Replace);
1611
1612 if (!N0.getNode())
1613 return SDValue();
1614
1615 SDLoc DL(Op);
1616 SDValue N1 = Op.getOperand(1);
1617 SDValue RV =
1618 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1619
1620 if (Replace)
1621 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1622
1623 // Deal with Op being deleted.
1624 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1625 return RV;
1626 }
1627 return SDValue();
1628}
1629
1630SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1631 if (!LegalOperations)
1632 return SDValue();
1633
1634 EVT VT = Op.getValueType();
1635 if (VT.isVector() || !VT.isInteger())
1636 return SDValue();
1637
1638 // If operation type is 'undesirable', e.g. i16 on x86, consider
1639 // promoting it.
1640 unsigned Opc = Op.getOpcode();
1641 if (TLI.isTypeDesirableForOp(Opc, VT))
1642 return SDValue();
1643
1644 EVT PVT = VT;
1645 // Consult target whether it is a good idea to promote this operation and
1646 // what's the right type to promote it to.
1647 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1648 assert(PVT != VT && "Don't know what type to promote to!");
1649 // fold (aext (aext x)) -> (aext x)
1650 // fold (aext (zext x)) -> (zext x)
1651 // fold (aext (sext x)) -> (sext x)
1652 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1653 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1654 }
1655 return SDValue();
1656}
1657
1658bool DAGCombiner::PromoteLoad(SDValue Op) {
1659 if (!LegalOperations)
1660 return false;
1661
1662 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1663 return false;
1664
1665 EVT VT = Op.getValueType();
1666 if (VT.isVector() || !VT.isInteger())
1667 return false;
1668
1669 // If operation type is 'undesirable', e.g. i16 on x86, consider
1670 // promoting it.
1671 unsigned Opc = Op.getOpcode();
1672 if (TLI.isTypeDesirableForOp(Opc, VT))
1673 return false;
1674
1675 EVT PVT = VT;
1676 // Consult target whether it is a good idea to promote this operation and
1677 // what's the right type to promote it to.
1678 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1679 assert(PVT != VT && "Don't know what type to promote to!");
1680
1681 SDLoc DL(Op);
1682 SDNode *N = Op.getNode();
1683 LoadSDNode *LD = cast<LoadSDNode>(N);
1684 EVT MemVT = LD->getMemoryVT();
1686 : LD->getExtensionType();
1687 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1688 LD->getChain(), LD->getBasePtr(),
1689 MemVT, LD->getMemOperand());
1690 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1691
1692 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1693 Result.dump(&DAG); dbgs() << '\n');
1694
1696 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1697
1698 AddToWorklist(Result.getNode());
1699 recursivelyDeleteUnusedNodes(N);
1700 return true;
1701 }
1702
1703 return false;
1704}
1705
1706/// Recursively delete a node which has no uses and any operands for
1707/// which it is the only use.
1708///
1709/// Note that this both deletes the nodes and removes them from the worklist.
1710/// It also adds any nodes who have had a user deleted to the worklist as they
1711/// may now have only one use and subject to other combines.
1712bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1713 if (!N->use_empty())
1714 return false;
1715
1717 Nodes.insert(N);
1718 do {
1719 N = Nodes.pop_back_val();
1720 if (!N)
1721 continue;
1722
1723 if (N->use_empty()) {
1724 for (const SDValue &ChildN : N->op_values())
1725 Nodes.insert(ChildN.getNode());
1726
1727 removeFromWorklist(N);
1728 DAG.DeleteNode(N);
1729 } else {
1730 AddToWorklist(N);
1731 }
1732 } while (!Nodes.empty());
1733 return true;
1734}
1735
1736//===----------------------------------------------------------------------===//
1737// Main DAG Combiner implementation
1738//===----------------------------------------------------------------------===//
1739
1740void DAGCombiner::Run(CombineLevel AtLevel) {
1741 // set the instance variables, so that the various visit routines may use it.
1742 Level = AtLevel;
1743 LegalDAG = Level >= AfterLegalizeDAG;
1744 LegalOperations = Level >= AfterLegalizeVectorOps;
1745 LegalTypes = Level >= AfterLegalizeTypes;
1746
1747 WorklistInserter AddNodes(*this);
1748
1749 // Add all the dag nodes to the worklist.
1750 //
1751 // Note: All nodes are not added to PruningList here, this is because the only
1752 // nodes which can be deleted are those which have no uses and all other nodes
1753 // which would otherwise be added to the worklist by the first call to
1754 // getNextWorklistEntry are already present in it.
1755 for (SDNode &Node : DAG.allnodes())
1756 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1757
1758 // Create a dummy node (which is not added to allnodes), that adds a reference
1759 // to the root node, preventing it from being deleted, and tracking any
1760 // changes of the root.
1761 HandleSDNode Dummy(DAG.getRoot());
1762
1763 // While we have a valid worklist entry node, try to combine it.
1764 while (SDNode *N = getNextWorklistEntry()) {
1765 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1766 // N is deleted from the DAG, since they too may now be dead or may have a
1767 // reduced number of uses, allowing other xforms.
1768 if (recursivelyDeleteUnusedNodes(N))
1769 continue;
1770
1771 WorklistRemover DeadNodes(*this);
1772
1773 // If this combine is running after legalizing the DAG, re-legalize any
1774 // nodes pulled off the worklist.
1775 if (LegalDAG) {
1776 SmallSetVector<SDNode *, 16> UpdatedNodes;
1777 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1778
1779 for (SDNode *LN : UpdatedNodes)
1780 AddToWorklistWithUsers(LN);
1781
1782 if (!NIsValid)
1783 continue;
1784 }
1785
1786 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1787
1788 // Add any operands of the new node which have not yet been combined to the
1789 // worklist as well. getNextWorklistEntry flags nodes that have been
1790 // combined before. Because the worklist uniques things already, this won't
1791 // repeatedly process the same operand.
1792 for (const SDValue &ChildN : N->op_values())
1793 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1794 /*SkipIfCombinedBefore=*/true);
1795
1796 SDValue RV = combine(N);
1797
1798 if (!RV.getNode())
1799 continue;
1800
1801 ++NodesCombined;
1802
1803 // Invalidate cached info.
1804 ChainsWithoutMergeableStores.clear();
1805
1806 // If we get back the same node we passed in, rather than a new node or
1807 // zero, we know that the node must have defined multiple values and
1808 // CombineTo was used. Since CombineTo takes care of the worklist
1809 // mechanics for us, we have no work to do in this case.
1810 if (RV.getNode() == N)
1811 continue;
1812
1813 assert(N->getOpcode() != ISD::DELETED_NODE &&
1814 RV.getOpcode() != ISD::DELETED_NODE &&
1815 "Node was deleted but visit returned new node!");
1816
1817 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1818
1819 if (N->getNumValues() == RV->getNumValues())
1820 DAG.ReplaceAllUsesWith(N, RV.getNode());
1821 else {
1822 assert(N->getValueType(0) == RV.getValueType() &&
1823 N->getNumValues() == 1 && "Type mismatch");
1824 DAG.ReplaceAllUsesWith(N, &RV);
1825 }
1826
1827 // Push the new node and any users onto the worklist. Omit this if the
1828 // new node is the EntryToken (e.g. if a store managed to get optimized
1829 // out), because re-visiting the EntryToken and its users will not uncover
1830 // any additional opportunities, but there may be a large number of such
1831 // users, potentially causing compile time explosion.
1832 if (RV.getOpcode() != ISD::EntryToken)
1833 AddToWorklistWithUsers(RV.getNode());
1834
1835 // Finally, if the node is now dead, remove it from the graph. The node
1836 // may not be dead if the replacement process recursively simplified to
1837 // something else needing this node. This will also take care of adding any
1838 // operands which have lost a user to the worklist.
1839 recursivelyDeleteUnusedNodes(N);
1840 }
1841
1842 // If the root changed (e.g. it was a dead load, update the root).
1843 DAG.setRoot(Dummy.getValue());
1844 DAG.RemoveDeadNodes();
1845}
1846
1847SDValue DAGCombiner::visit(SDNode *N) {
1848 // clang-format off
1849 switch (N->getOpcode()) {
1850 default: break;
1851 case ISD::TokenFactor: return visitTokenFactor(N);
1852 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1853 case ISD::ADD: return visitADD(N);
1854 case ISD::SUB: return visitSUB(N);
1855 case ISD::SADDSAT:
1856 case ISD::UADDSAT: return visitADDSAT(N);
1857 case ISD::SSUBSAT:
1858 case ISD::USUBSAT: return visitSUBSAT(N);
1859 case ISD::ADDC: return visitADDC(N);
1860 case ISD::SADDO:
1861 case ISD::UADDO: return visitADDO(N);
1862 case ISD::SUBC: return visitSUBC(N);
1863 case ISD::SSUBO:
1864 case ISD::USUBO: return visitSUBO(N);
1865 case ISD::ADDE: return visitADDE(N);
1866 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1867 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1868 case ISD::SUBE: return visitSUBE(N);
1869 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1870 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1871 case ISD::SMULFIX:
1872 case ISD::SMULFIXSAT:
1873 case ISD::UMULFIX:
1874 case ISD::UMULFIXSAT: return visitMULFIX(N);
1875 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1876 case ISD::SDIV: return visitSDIV(N);
1877 case ISD::UDIV: return visitUDIV(N);
1878 case ISD::SREM:
1879 case ISD::UREM: return visitREM(N);
1880 case ISD::MULHU: return visitMULHU(N);
1881 case ISD::MULHS: return visitMULHS(N);
1882 case ISD::AVGFLOORS:
1883 case ISD::AVGFLOORU:
1884 case ISD::AVGCEILS:
1885 case ISD::AVGCEILU: return visitAVG(N);
1886 case ISD::ABDS:
1887 case ISD::ABDU: return visitABD(N);
1888 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1889 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1890 case ISD::SMULO:
1891 case ISD::UMULO: return visitMULO(N);
1892 case ISD::SMIN:
1893 case ISD::SMAX:
1894 case ISD::UMIN:
1895 case ISD::UMAX: return visitIMINMAX(N);
1896 case ISD::AND: return visitAND(N);
1897 case ISD::OR: return visitOR(N);
1898 case ISD::XOR: return visitXOR(N);
1899 case ISD::SHL: return visitSHL(N);
1900 case ISD::SRA: return visitSRA(N);
1901 case ISD::SRL: return visitSRL(N);
1902 case ISD::ROTR:
1903 case ISD::ROTL: return visitRotate(N);
1904 case ISD::FSHL:
1905 case ISD::FSHR: return visitFunnelShift(N);
1906 case ISD::SSHLSAT:
1907 case ISD::USHLSAT: return visitSHLSAT(N);
1908 case ISD::ABS: return visitABS(N);
1909 case ISD::BSWAP: return visitBSWAP(N);
1910 case ISD::BITREVERSE: return visitBITREVERSE(N);
1911 case ISD::CTLZ: return visitCTLZ(N);
1912 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1913 case ISD::CTTZ: return visitCTTZ(N);
1914 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1915 case ISD::CTPOP: return visitCTPOP(N);
1916 case ISD::SELECT: return visitSELECT(N);
1917 case ISD::VSELECT: return visitVSELECT(N);
1918 case ISD::SELECT_CC: return visitSELECT_CC(N);
1919 case ISD::SETCC: return visitSETCC(N);
1920 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1921 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1922 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1923 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1924 case ISD::AssertSext:
1925 case ISD::AssertZext: return visitAssertExt(N);
1926 case ISD::AssertAlign: return visitAssertAlign(N);
1927 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1930 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1931 case ISD::TRUNCATE: return visitTRUNCATE(N);
1932 case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
1933 case ISD::BITCAST: return visitBITCAST(N);
1934 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1935 case ISD::FADD: return visitFADD(N);
1936 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1937 case ISD::FSUB: return visitFSUB(N);
1938 case ISD::FMUL: return visitFMUL(N);
1939 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1940 case ISD::FMAD: return visitFMAD(N);
1941 case ISD::FDIV: return visitFDIV(N);
1942 case ISD::FREM: return visitFREM(N);
1943 case ISD::FSQRT: return visitFSQRT(N);
1944 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1945 case ISD::FPOW: return visitFPOW(N);
1946 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1947 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1948 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1949 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1950 case ISD::LROUND:
1951 case ISD::LLROUND:
1952 case ISD::LRINT:
1953 case ISD::LLRINT: return visitXROUND(N);
1954 case ISD::FP_ROUND: return visitFP_ROUND(N);
1955 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1956 case ISD::FNEG: return visitFNEG(N);
1957 case ISD::FABS: return visitFABS(N);
1958 case ISD::FFLOOR: return visitFFLOOR(N);
1959 case ISD::FMINNUM:
1960 case ISD::FMAXNUM:
1961 case ISD::FMINIMUM:
1962 case ISD::FMAXIMUM:
1963 case ISD::FMINIMUMNUM:
1964 case ISD::FMAXIMUMNUM: return visitFMinMax(N);
1965 case ISD::FCEIL: return visitFCEIL(N);
1966 case ISD::FTRUNC: return visitFTRUNC(N);
1967 case ISD::FFREXP: return visitFFREXP(N);
1968 case ISD::BRCOND: return visitBRCOND(N);
1969 case ISD::BR_CC: return visitBR_CC(N);
1970 case ISD::LOAD: return visitLOAD(N);
1971 case ISD::STORE: return visitSTORE(N);
1972 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
1973 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1974 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1975 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1976 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1977 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1978 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1979 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1980 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1981 case ISD::MGATHER: return visitMGATHER(N);
1982 case ISD::MLOAD: return visitMLOAD(N);
1983 case ISD::MSCATTER: return visitMSCATTER(N);
1984 case ISD::MSTORE: return visitMSTORE(N);
1985 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
1986 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
1987 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1988 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1989 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1990 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1991 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
1992 case ISD::FREEZE: return visitFREEZE(N);
1993 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
1994 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1995 case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
1998 case ISD::VECREDUCE_ADD:
1999 case ISD::VECREDUCE_MUL:
2000 case ISD::VECREDUCE_AND:
2001 case ISD::VECREDUCE_OR:
2002 case ISD::VECREDUCE_XOR:
2010 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2011#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2012#include "llvm/IR/VPIntrinsics.def"
2013 return visitVPOp(N);
2014 }
2015 // clang-format on
2016 return SDValue();
2017}
2018
2019SDValue DAGCombiner::combine(SDNode *N) {
2020 if (!DebugCounter::shouldExecute(DAGCombineCounter))
2021 return SDValue();
2022
2023 SDValue RV;
2024 if (!DisableGenericCombines)
2025 RV = visit(N);
2026
2027 // If nothing happened, try a target-specific DAG combine.
2028 if (!RV.getNode()) {
2029 assert(N->getOpcode() != ISD::DELETED_NODE &&
2030 "Node was deleted but visit returned NULL!");
2031
2032 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2033 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2034
2035 // Expose the DAG combiner to the target combiner impls.
2037 DagCombineInfo(DAG, Level, false, this);
2038
2039 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2040 }
2041 }
2042
2043 // If nothing happened still, try promoting the operation.
2044 if (!RV.getNode()) {
2045 switch (N->getOpcode()) {
2046 default: break;
2047 case ISD::ADD:
2048 case ISD::SUB:
2049 case ISD::MUL:
2050 case ISD::AND:
2051 case ISD::OR:
2052 case ISD::XOR:
2053 RV = PromoteIntBinOp(SDValue(N, 0));
2054 break;
2055 case ISD::SHL:
2056 case ISD::SRA:
2057 case ISD::SRL:
2058 RV = PromoteIntShiftOp(SDValue(N, 0));
2059 break;
2060 case ISD::SIGN_EXTEND:
2061 case ISD::ZERO_EXTEND:
2062 case ISD::ANY_EXTEND:
2063 RV = PromoteExtend(SDValue(N, 0));
2064 break;
2065 case ISD::LOAD:
2066 if (PromoteLoad(SDValue(N, 0)))
2067 RV = SDValue(N, 0);
2068 break;
2069 }
2070 }
2071
2072 // If N is a commutative binary node, try to eliminate it if the commuted
2073 // version is already present in the DAG.
2074 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2075 SDValue N0 = N->getOperand(0);
2076 SDValue N1 = N->getOperand(1);
2077
2078 // Constant operands are canonicalized to RHS.
2079 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2080 SDValue Ops[] = {N1, N0};
2081 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2082 N->getFlags());
2083 if (CSENode)
2084 return SDValue(CSENode, 0);
2085 }
2086 }
2087
2088 return RV;
2089}
2090
2091/// Given a node, return its input chain if it has one, otherwise return a null
2092/// sd operand.
2094 if (unsigned NumOps = N->getNumOperands()) {
2095 if (N->getOperand(0).getValueType() == MVT::Other)
2096 return N->getOperand(0);
2097 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2098 return N->getOperand(NumOps-1);
2099 for (unsigned i = 1; i < NumOps-1; ++i)
2100 if (N->getOperand(i).getValueType() == MVT::Other)
2101 return N->getOperand(i);
2102 }
2103 return SDValue();
2104}
2105
2106SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2107 SDValue Operand = N->getOperand(0);
2108 EVT VT = Operand.getValueType();
2109 SDLoc dl(N);
2110
2111 // Canonicalize undef to quiet NaN.
2112 if (Operand.isUndef()) {
2113 APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2114 return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2115 }
2116 return SDValue();
2117}
2118
2119SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2120 // If N has two operands, where one has an input chain equal to the other,
2121 // the 'other' chain is redundant.
2122 if (N->getNumOperands() == 2) {
2123 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2124 return N->getOperand(0);
2125 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2126 return N->getOperand(1);
2127 }
2128
2129 // Don't simplify token factors if optnone.
2130 if (OptLevel == CodeGenOptLevel::None)
2131 return SDValue();
2132
2133 // Don't simplify the token factor if the node itself has too many operands.
2134 if (N->getNumOperands() > TokenFactorInlineLimit)
2135 return SDValue();
2136
2137 // If the sole user is a token factor, we should make sure we have a
2138 // chance to merge them together. This prevents TF chains from inhibiting
2139 // optimizations.
2140 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2141 AddToWorklist(*(N->user_begin()));
2142
2143 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2144 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2146 bool Changed = false; // If we should replace this token factor.
2147
2148 // Start out with this token factor.
2149 TFs.push_back(N);
2150
2151 // Iterate through token factors. The TFs grows when new token factors are
2152 // encountered.
2153 for (unsigned i = 0; i < TFs.size(); ++i) {
2154 // Limit number of nodes to inline, to avoid quadratic compile times.
2155 // We have to add the outstanding Token Factors to Ops, otherwise we might
2156 // drop Ops from the resulting Token Factors.
2157 if (Ops.size() > TokenFactorInlineLimit) {
2158 for (unsigned j = i; j < TFs.size(); j++)
2159 Ops.emplace_back(TFs[j], 0);
2160 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2161 // combiner worklist later.
2162 TFs.resize(i);
2163 break;
2164 }
2165
2166 SDNode *TF = TFs[i];
2167 // Check each of the operands.
2168 for (const SDValue &Op : TF->op_values()) {
2169 switch (Op.getOpcode()) {
2170 case ISD::EntryToken:
2171 // Entry tokens don't need to be added to the list. They are
2172 // redundant.
2173 Changed = true;
2174 break;
2175
2176 case ISD::TokenFactor:
2177 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2178 // Queue up for processing.
2179 TFs.push_back(Op.getNode());
2180 Changed = true;
2181 break;
2182 }
2183 [[fallthrough]];
2184
2185 default:
2186 // Only add if it isn't already in the list.
2187 if (SeenOps.insert(Op.getNode()).second)
2188 Ops.push_back(Op);
2189 else
2190 Changed = true;
2191 break;
2192 }
2193 }
2194 }
2195
2196 // Re-visit inlined Token Factors, to clean them up in case they have been
2197 // removed. Skip the first Token Factor, as this is the current node.
2198 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2199 AddToWorklist(TFs[i]);
2200
2201 // Remove Nodes that are chained to another node in the list. Do so
2202 // by walking up chains breath-first stopping when we've seen
2203 // another operand. In general we must climb to the EntryNode, but we can exit
2204 // early if we find all remaining work is associated with just one operand as
2205 // no further pruning is possible.
2206
2207 // List of nodes to search through and original Ops from which they originate.
2209 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2210 SmallPtrSet<SDNode *, 16> SeenChains;
2211 bool DidPruneOps = false;
2212
2213 unsigned NumLeftToConsider = 0;
2214 for (const SDValue &Op : Ops) {
2215 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2216 OpWorkCount.push_back(1);
2217 }
2218
2219 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2220 // If this is an Op, we can remove the op from the list. Remark any
2221 // search associated with it as from the current OpNumber.
2222 if (SeenOps.contains(Op)) {
2223 Changed = true;
2224 DidPruneOps = true;
2225 unsigned OrigOpNumber = 0;
2226 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2227 OrigOpNumber++;
2228 assert((OrigOpNumber != Ops.size()) &&
2229 "expected to find TokenFactor Operand");
2230 // Re-mark worklist from OrigOpNumber to OpNumber
2231 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2232 if (Worklist[i].second == OrigOpNumber) {
2233 Worklist[i].second = OpNumber;
2234 }
2235 }
2236 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2237 OpWorkCount[OrigOpNumber] = 0;
2238 NumLeftToConsider--;
2239 }
2240 // Add if it's a new chain
2241 if (SeenChains.insert(Op).second) {
2242 OpWorkCount[OpNumber]++;
2243 Worklist.push_back(std::make_pair(Op, OpNumber));
2244 }
2245 };
2246
2247 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2248 // We need at least be consider at least 2 Ops to prune.
2249 if (NumLeftToConsider <= 1)
2250 break;
2251 auto CurNode = Worklist[i].first;
2252 auto CurOpNumber = Worklist[i].second;
2253 assert((OpWorkCount[CurOpNumber] > 0) &&
2254 "Node should not appear in worklist");
2255 switch (CurNode->getOpcode()) {
2256 case ISD::EntryToken:
2257 // Hitting EntryToken is the only way for the search to terminate without
2258 // hitting
2259 // another operand's search. Prevent us from marking this operand
2260 // considered.
2261 NumLeftToConsider++;
2262 break;
2263 case ISD::TokenFactor:
2264 for (const SDValue &Op : CurNode->op_values())
2265 AddToWorklist(i, Op.getNode(), CurOpNumber);
2266 break;
2268 case ISD::LIFETIME_END:
2269 case ISD::CopyFromReg:
2270 case ISD::CopyToReg:
2271 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2272 break;
2273 default:
2274 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2275 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2276 break;
2277 }
2278 OpWorkCount[CurOpNumber]--;
2279 if (OpWorkCount[CurOpNumber] == 0)
2280 NumLeftToConsider--;
2281 }
2282
2283 // If we've changed things around then replace token factor.
2284 if (Changed) {
2286 if (Ops.empty()) {
2287 // The entry token is the only possible outcome.
2288 Result = DAG.getEntryNode();
2289 } else {
2290 if (DidPruneOps) {
2291 SmallVector<SDValue, 8> PrunedOps;
2292 //
2293 for (const SDValue &Op : Ops) {
2294 if (SeenChains.count(Op.getNode()) == 0)
2295 PrunedOps.push_back(Op);
2296 }
2297 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2298 } else {
2299 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2300 }
2301 }
2302 return Result;
2303 }
2304 return SDValue();
2305}
2306
2307/// MERGE_VALUES can always be eliminated.
2308SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2309 WorklistRemover DeadNodes(*this);
2310 // Replacing results may cause a different MERGE_VALUES to suddenly
2311 // be CSE'd with N, and carry its uses with it. Iterate until no
2312 // uses remain, to ensure that the node can be safely deleted.
2313 // First add the users of this node to the work list so that they
2314 // can be tried again once they have new operands.
2315 AddUsersToWorklist(N);
2316 do {
2317 // Do as a single replacement to avoid rewalking use lists.
2318 SmallVector<SDValue, 8> Ops(N->ops());
2319 DAG.ReplaceAllUsesWith(N, Ops.data());
2320 } while (!N->use_empty());
2321 deleteAndRecombine(N);
2322 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2323}
2324
2325/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2326/// ConstantSDNode pointer else nullptr.
2328 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2329 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2330}
2331
2332// isTruncateOf - If N is a truncate of some other value, return true, record
2333// the value being truncated in Op and which of Op's bits are zero/one in Known.
2334// This function computes KnownBits to avoid a duplicated call to
2335// computeKnownBits in the caller.
2337 KnownBits &Known) {
2338 if (N->getOpcode() == ISD::TRUNCATE) {
2339 Op = N->getOperand(0);
2340 Known = DAG.computeKnownBits(Op);
2341 if (N->getFlags().hasNoUnsignedWrap())
2342 Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2343 return true;
2344 }
2345
2346 if (N.getValueType().getScalarType() != MVT::i1 ||
2347 !sd_match(
2349 return false;
2350
2351 Known = DAG.computeKnownBits(Op);
2352 return (Known.Zero | 1).isAllOnes();
2353}
2354
2355/// Return true if 'Use' is a load or a store that uses N as its base pointer
2356/// and that N may be folded in the load / store addressing mode.
2358 const TargetLowering &TLI) {
2359 EVT VT;
2360 unsigned AS;
2361
2362 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2363 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2364 return false;
2365 VT = LD->getMemoryVT();
2366 AS = LD->getAddressSpace();
2367 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2368 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2369 return false;
2370 VT = ST->getMemoryVT();
2371 AS = ST->getAddressSpace();
2372 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2373 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2374 return false;
2375 VT = LD->getMemoryVT();
2376 AS = LD->getAddressSpace();
2377 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2378 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2379 return false;
2380 VT = ST->getMemoryVT();
2381 AS = ST->getAddressSpace();
2382 } else {
2383 return false;
2384 }
2385
2387 if (N->getOpcode() == ISD::ADD) {
2388 AM.HasBaseReg = true;
2389 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2390 if (Offset)
2391 // [reg +/- imm]
2392 AM.BaseOffs = Offset->getSExtValue();
2393 else
2394 // [reg +/- reg]
2395 AM.Scale = 1;
2396 } else if (N->getOpcode() == ISD::SUB) {
2397 AM.HasBaseReg = true;
2398 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2399 if (Offset)
2400 // [reg +/- imm]
2401 AM.BaseOffs = -Offset->getSExtValue();
2402 else
2403 // [reg +/- reg]
2404 AM.Scale = 1;
2405 } else {
2406 return false;
2407 }
2408
2409 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2410 VT.getTypeForEVT(*DAG.getContext()), AS);
2411}
2412
2413/// This inverts a canonicalization in IR that replaces a variable select arm
2414/// with an identity constant. Codegen improves if we re-use the variable
2415/// operand rather than load a constant. This can also be converted into a
2416/// masked vector operation if the target supports it.
2418 bool ShouldCommuteOperands) {
2419 // Match a select as operand 1. The identity constant that we are looking for
2420 // is only valid as operand 1 of a non-commutative binop.
2421 SDValue N0 = N->getOperand(0);
2422 SDValue N1 = N->getOperand(1);
2423 if (ShouldCommuteOperands)
2424 std::swap(N0, N1);
2425
2426 // TODO: Should this apply to scalar select too?
2427 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2428 return SDValue();
2429
2430 // We can't hoist all instructions because of immediate UB (not speculatable).
2431 // For example div/rem by zero.
2433 return SDValue();
2434
2435 unsigned Opcode = N->getOpcode();
2436 EVT VT = N->getValueType(0);
2437 SDValue Cond = N1.getOperand(0);
2438 SDValue TVal = N1.getOperand(1);
2439 SDValue FVal = N1.getOperand(2);
2440
2441 // This transform increases uses of N0, so freeze it to be safe.
2442 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2443 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2444 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2445 SDValue F0 = DAG.getFreeze(N0);
2446 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2447 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2448 }
2449 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2450 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2451 SDValue F0 = DAG.getFreeze(N0);
2452 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2453 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2454 }
2455
2456 return SDValue();
2457}
2458
2459SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2460 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2461 "Unexpected binary operator");
2462
2463 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2464 auto BinOpcode = BO->getOpcode();
2465 EVT VT = BO->getValueType(0);
2466 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2467 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2468 return Sel;
2469
2470 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2471 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2472 return Sel;
2473 }
2474
2475 // Don't do this unless the old select is going away. We want to eliminate the
2476 // binary operator, not replace a binop with a select.
2477 // TODO: Handle ISD::SELECT_CC.
2478 unsigned SelOpNo = 0;
2479 SDValue Sel = BO->getOperand(0);
2480 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2481 SelOpNo = 1;
2482 Sel = BO->getOperand(1);
2483
2484 // Peek through trunc to shift amount type.
2485 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2486 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2487 // This is valid when the truncated bits of x are already zero.
2488 SDValue Op;
2489 KnownBits Known;
2490 if (isTruncateOf(DAG, Sel, Op, Known) &&
2492 Sel = Op;
2493 }
2494 }
2495
2496 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2497 return SDValue();
2498
2499 SDValue CT = Sel.getOperand(1);
2500 if (!isConstantOrConstantVector(CT, true) &&
2502 return SDValue();
2503
2504 SDValue CF = Sel.getOperand(2);
2505 if (!isConstantOrConstantVector(CF, true) &&
2507 return SDValue();
2508
2509 // Bail out if any constants are opaque because we can't constant fold those.
2510 // The exception is "and" and "or" with either 0 or -1 in which case we can
2511 // propagate non constant operands into select. I.e.:
2512 // and (select Cond, 0, -1), X --> select Cond, 0, X
2513 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2514 bool CanFoldNonConst =
2515 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2518
2519 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2520 if (!CanFoldNonConst &&
2521 !isConstantOrConstantVector(CBO, true) &&
2523 return SDValue();
2524
2525 SDLoc DL(Sel);
2526 SDValue NewCT, NewCF;
2527
2528 if (CanFoldNonConst) {
2529 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2530 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2531 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2532 NewCT = CT;
2533 else
2534 NewCT = CBO;
2535
2536 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2537 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2538 NewCF = CF;
2539 else
2540 NewCF = CBO;
2541 } else {
2542 // We have a select-of-constants followed by a binary operator with a
2543 // constant. Eliminate the binop by pulling the constant math into the
2544 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2545 // CBO, CF + CBO
2546 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2547 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2548 if (!NewCT)
2549 return SDValue();
2550
2551 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2552 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2553 if (!NewCF)
2554 return SDValue();
2555 }
2556
2557 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2558 SelectOp->setFlags(BO->getFlags());
2559 return SelectOp;
2560}
2561
2563 SelectionDAG &DAG) {
2564 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2565 "Expecting add or sub");
2566
2567 // Match a constant operand and a zext operand for the math instruction:
2568 // add Z, C
2569 // sub C, Z
2570 bool IsAdd = N->getOpcode() == ISD::ADD;
2571 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2572 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2573 auto *CN = dyn_cast<ConstantSDNode>(C);
2574 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2575 return SDValue();
2576
2577 // Match the zext operand as a setcc of a boolean.
2578 if (Z.getOperand(0).getValueType() != MVT::i1)
2579 return SDValue();
2580
2581 // Match the compare as: setcc (X & 1), 0, eq.
2582 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2584 return SDValue();
2585
2586 // We are adding/subtracting a constant and an inverted low bit. Turn that
2587 // into a subtract/add of the low bit with incremented/decremented constant:
2588 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2589 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2590 EVT VT = C.getValueType();
2591 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2592 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2593 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2594 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2595}
2596
2597// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2598SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2599 SDValue N0 = N->getOperand(0);
2600 EVT VT = N0.getValueType();
2601 SDValue A, B;
2602
2603 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2605 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2606 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2607 }
2608 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2610 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2611 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2612 }
2613 return SDValue();
2614}
2615
2616/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2617/// a shift and add with a different constant.
2619 SelectionDAG &DAG) {
2620 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2621 "Expecting add or sub");
2622
2623 // We need a constant operand for the add/sub, and the other operand is a
2624 // logical shift right: add (srl), C or sub C, (srl).
2625 bool IsAdd = N->getOpcode() == ISD::ADD;
2626 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2627 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2628 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2629 ShiftOp.getOpcode() != ISD::SRL)
2630 return SDValue();
2631
2632 // The shift must be of a 'not' value.
2633 SDValue Not = ShiftOp.getOperand(0);
2634 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2635 return SDValue();
2636
2637 // The shift must be moving the sign bit to the least-significant-bit.
2638 EVT VT = ShiftOp.getValueType();
2639 SDValue ShAmt = ShiftOp.getOperand(1);
2640 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2641 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2642 return SDValue();
2643
2644 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2645 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2646 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2647 if (SDValue NewC = DAG.FoldConstantArithmetic(
2648 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2649 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2650 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2651 Not.getOperand(0), ShAmt);
2652 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2653 }
2654
2655 return SDValue();
2656}
2657
2658static bool
2660 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2661 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2662}
2663
2664/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2665/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2666/// are no common bits set in the operands).
2667SDValue DAGCombiner::visitADDLike(SDNode *N) {
2668 SDValue N0 = N->getOperand(0);
2669 SDValue N1 = N->getOperand(1);
2670 EVT VT = N0.getValueType();
2671 SDLoc DL(N);
2672
2673 // fold (add x, undef) -> undef
2674 if (N0.isUndef())
2675 return N0;
2676 if (N1.isUndef())
2677 return N1;
2678
2679 // fold (add c1, c2) -> c1+c2
2680 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2681 return C;
2682
2683 // canonicalize constant to RHS
2686 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2687
2688 if (areBitwiseNotOfEachother(N0, N1))
2689 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2690
2691 // fold vector ops
2692 if (VT.isVector()) {
2693 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2694 return FoldedVOp;
2695
2696 // fold (add x, 0) -> x, vector edition
2698 return N0;
2699 }
2700
2701 // fold (add x, 0) -> x
2702 if (isNullConstant(N1))
2703 return N0;
2704
2705 if (N0.getOpcode() == ISD::SUB) {
2706 SDValue N00 = N0.getOperand(0);
2707 SDValue N01 = N0.getOperand(1);
2708
2709 // fold ((A-c1)+c2) -> (A+(c2-c1))
2710 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2711 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2712
2713 // fold ((c1-A)+c2) -> (c1+c2)-A
2714 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2715 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2716 }
2717
2718 // add (sext i1 X), 1 -> zext (not i1 X)
2719 // We don't transform this pattern:
2720 // add (zext i1 X), -1 -> sext (not i1 X)
2721 // because most (?) targets generate better code for the zext form.
2722 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2723 isOneOrOneSplat(N1)) {
2724 SDValue X = N0.getOperand(0);
2725 if ((!LegalOperations ||
2726 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2728 X.getScalarValueSizeInBits() == 1) {
2729 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2730 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2731 }
2732 }
2733
2734 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2735 // iff (or x, c0) is equivalent to (add x, c0).
2736 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2737 // iff (xor x, c0) is equivalent to (add x, c0).
2738 if (DAG.isADDLike(N0)) {
2739 SDValue N01 = N0.getOperand(1);
2740 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2741 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2742 }
2743
2744 if (SDValue NewSel = foldBinOpIntoSelect(N))
2745 return NewSel;
2746
2747 // reassociate add
2748 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2749 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2750 return RADD;
2751
2752 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2753 // equivalent to (add x, c).
2754 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2755 // equivalent to (add x, c).
2756 // Do this optimization only when adding c does not introduce instructions
2757 // for adding carries.
2758 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2759 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2760 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2761 // If N0's type does not split or is a sign mask, it does not introduce
2762 // add carry.
2763 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2764 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2767 if (NoAddCarry)
2768 return DAG.getNode(
2769 ISD::ADD, DL, VT,
2770 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2771 N0.getOperand(1));
2772 }
2773 return SDValue();
2774 };
2775 if (SDValue Add = ReassociateAddOr(N0, N1))
2776 return Add;
2777 if (SDValue Add = ReassociateAddOr(N1, N0))
2778 return Add;
2779
2780 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2781 if (SDValue SD =
2782 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2783 return SD;
2784 }
2785
2786 SDValue A, B, C, D;
2787
2788 // fold ((0-A) + B) -> B-A
2789 if (sd_match(N0, m_Neg(m_Value(A))))
2790 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2791
2792 // fold (A + (0-B)) -> A-B
2793 if (sd_match(N1, m_Neg(m_Value(B))))
2794 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2795
2796 // fold (A+(B-A)) -> B
2797 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2798 return B;
2799
2800 // fold ((B-A)+A) -> B
2801 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2802 return B;
2803
2804 // fold ((A-B)+(C-A)) -> (C-B)
2805 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2807 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2808
2809 // fold ((A-B)+(B-C)) -> (A-C)
2810 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2812 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2813
2814 // fold (A+(B-(A+C))) to (B-C)
2815 // fold (A+(B-(C+A))) to (B-C)
2816 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2817 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2818
2819 // fold (A+((B-A)+or-C)) to (B+or-C)
2820 if (sd_match(N1,
2822 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2823 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2824
2825 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2826 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
2827 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
2829 return DAG.getNode(ISD::SUB, DL, VT,
2830 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
2831 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
2832
2833 // fold (add (umax X, C), -C) --> (usubsat X, C)
2834 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2835 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2836 return (!Max && !Op) ||
2837 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2838 };
2839 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2840 /*AllowUndefs*/ true))
2841 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2842 N0.getOperand(1));
2843 }
2844
2846 return SDValue(N, 0);
2847
2848 if (isOneOrOneSplat(N1)) {
2849 // fold (add (xor a, -1), 1) -> (sub 0, a)
2850 if (isBitwiseNot(N0))
2851 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2852 N0.getOperand(0));
2853
2854 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2855 if (N0.getOpcode() == ISD::ADD) {
2856 SDValue A, Xor;
2857
2858 if (isBitwiseNot(N0.getOperand(0))) {
2859 A = N0.getOperand(1);
2860 Xor = N0.getOperand(0);
2861 } else if (isBitwiseNot(N0.getOperand(1))) {
2862 A = N0.getOperand(0);
2863 Xor = N0.getOperand(1);
2864 }
2865
2866 if (Xor)
2867 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2868 }
2869
2870 // Look for:
2871 // add (add x, y), 1
2872 // And if the target does not like this form then turn into:
2873 // sub y, (xor x, -1)
2874 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2875 N0.hasOneUse() &&
2876 // Limit this to after legalization if the add has wrap flags
2877 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2878 !N->getFlags().hasNoSignedWrap()))) {
2879 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2880 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2881 }
2882 }
2883
2884 // (x - y) + -1 -> add (xor y, -1), x
2885 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2886 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
2887 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
2888 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
2889 }
2890
2891 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
2892 // This can help if the inner add has multiple uses.
2893 APInt CM, CA;
2894 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
2895 if (VT.getScalarSizeInBits() <= 64) {
2897 m_ConstInt(CM)))) &&
2899 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2901 // If all the inputs are nuw, the outputs can be nuw. If all the input
2902 // are _also_ nsw the outputs can be too.
2903 if (N->getFlags().hasNoUnsignedWrap() &&
2904 N0->getFlags().hasNoUnsignedWrap() &&
2907 if (N->getFlags().hasNoSignedWrap() &&
2908 N0->getFlags().hasNoSignedWrap() &&
2911 }
2912 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2913 DAG.getConstant(CM, DL, VT), Flags);
2914 return DAG.getNode(
2915 ISD::ADD, DL, VT, Mul,
2916 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2917 }
2918 // Also look in case there is an intermediate add.
2919 if (sd_match(N0, m_OneUse(m_Add(
2921 m_ConstInt(CM))),
2922 m_Value(B)))) &&
2924 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2926 // If all the inputs are nuw, the outputs can be nuw. If all the input
2927 // are _also_ nsw the outputs can be too.
2928 SDValue OMul =
2929 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
2930 if (N->getFlags().hasNoUnsignedWrap() &&
2931 N0->getFlags().hasNoUnsignedWrap() &&
2932 OMul->getFlags().hasNoUnsignedWrap() &&
2933 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2935 if (N->getFlags().hasNoSignedWrap() &&
2936 N0->getFlags().hasNoSignedWrap() &&
2937 OMul->getFlags().hasNoSignedWrap() &&
2938 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
2940 }
2941 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2942 DAG.getConstant(CM, DL, VT), Flags);
2943 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
2944 return DAG.getNode(
2945 ISD::ADD, DL, VT, Add,
2946 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2947 }
2948 }
2949 }
2950
2951 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2952 return Combined;
2953
2954 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2955 return Combined;
2956
2957 return SDValue();
2958}
2959
2960// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2961SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2962 SDValue N0 = N->getOperand(0);
2963 EVT VT = N0.getValueType();
2964 SDValue A, B;
2965
2966 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
2968 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2969 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2970 }
2971 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
2973 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2974 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2975 }
2976
2977 return SDValue();
2978}
2979
2980SDValue DAGCombiner::visitADD(SDNode *N) {
2981 SDValue N0 = N->getOperand(0);
2982 SDValue N1 = N->getOperand(1);
2983 EVT VT = N0.getValueType();
2984 SDLoc DL(N);
2985
2986 if (SDValue Combined = visitADDLike(N))
2987 return Combined;
2988
2989 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2990 return V;
2991
2992 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2993 return V;
2994
2995 // Try to match AVGFLOOR fixedwidth pattern
2996 if (SDValue V = foldAddToAvg(N, DL))
2997 return V;
2998
2999 // fold (a+b) -> (a|b) iff a and b share no bits.
3000 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
3001 DAG.haveNoCommonBitsSet(N0, N1))
3002 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
3003
3004 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
3005 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3006 const APInt &C0 = N0->getConstantOperandAPInt(0);
3007 const APInt &C1 = N1->getConstantOperandAPInt(0);
3008 return DAG.getVScale(DL, VT, C0 + C1);
3009 }
3010
3011 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3012 if (N0.getOpcode() == ISD::ADD &&
3013 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3014 N1.getOpcode() == ISD::VSCALE) {
3015 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3016 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3017 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3018 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3019 }
3020
3021 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3022 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3023 N1.getOpcode() == ISD::STEP_VECTOR) {
3024 const APInt &C0 = N0->getConstantOperandAPInt(0);
3025 const APInt &C1 = N1->getConstantOperandAPInt(0);
3026 APInt NewStep = C0 + C1;
3027 return DAG.getStepVector(DL, VT, NewStep);
3028 }
3029
3030 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3031 if (N0.getOpcode() == ISD::ADD &&
3033 N1.getOpcode() == ISD::STEP_VECTOR) {
3034 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3035 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3036 APInt NewStep = SV0 + SV1;
3037 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3038 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3039 }
3040
3041 return SDValue();
3042}
3043
3044SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3045 unsigned Opcode = N->getOpcode();
3046 SDValue N0 = N->getOperand(0);
3047 SDValue N1 = N->getOperand(1);
3048 EVT VT = N0.getValueType();
3049 bool IsSigned = Opcode == ISD::SADDSAT;
3050 SDLoc DL(N);
3051
3052 // fold (add_sat x, undef) -> -1
3053 if (N0.isUndef() || N1.isUndef())
3054 return DAG.getAllOnesConstant(DL, VT);
3055
3056 // fold (add_sat c1, c2) -> c3
3057 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3058 return C;
3059
3060 // canonicalize constant to RHS
3063 return DAG.getNode(Opcode, DL, VT, N1, N0);
3064
3065 // fold vector ops
3066 if (VT.isVector()) {
3067 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3068 return FoldedVOp;
3069
3070 // fold (add_sat x, 0) -> x, vector edition
3072 return N0;
3073 }
3074
3075 // fold (add_sat x, 0) -> x
3076 if (isNullConstant(N1))
3077 return N0;
3078
3079 // If it cannot overflow, transform into an add.
3080 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3081 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3082
3083 return SDValue();
3084}
3085
3087 bool ForceCarryReconstruction = false) {
3088 bool Masked = false;
3089
3090 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3091 while (true) {
3092 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3093 V = V.getOperand(0);
3094 continue;
3095 }
3096
3097 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3098 if (ForceCarryReconstruction)
3099 return V;
3100
3101 Masked = true;
3102 V = V.getOperand(0);
3103 continue;
3104 }
3105
3106 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3107 return V;
3108
3109 break;
3110 }
3111
3112 // If this is not a carry, return.
3113 if (V.getResNo() != 1)
3114 return SDValue();
3115
3116 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3117 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3118 return SDValue();
3119
3120 EVT VT = V->getValueType(0);
3121 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3122 return SDValue();
3123
3124 // If the result is masked, then no matter what kind of bool it is we can
3125 // return. If it isn't, then we need to make sure the bool type is either 0 or
3126 // 1 and not other values.
3127 if (Masked ||
3128 TLI.getBooleanContents(V.getValueType()) ==
3130 return V;
3131
3132 return SDValue();
3133}
3134
3135/// Given the operands of an add/sub operation, see if the 2nd operand is a
3136/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3137/// the opcode and bypass the mask operation.
3138static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3139 SelectionDAG &DAG, const SDLoc &DL) {
3140 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3141 N1 = N1.getOperand(0);
3142
3143 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3144 return SDValue();
3145
3146 EVT VT = N0.getValueType();
3147 SDValue N10 = N1.getOperand(0);
3148 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3149 N10 = N10.getOperand(0);
3150
3151 if (N10.getValueType() != VT)
3152 return SDValue();
3153
3154 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3155 return SDValue();
3156
3157 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3158 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3159 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3160}
3161
3162/// Helper for doing combines based on N0 and N1 being added to each other.
3163SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3164 SDNode *LocReference) {
3165 EVT VT = N0.getValueType();
3166 SDLoc DL(LocReference);
3167
3168 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3169 SDValue Y, N;
3170 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3171 return DAG.getNode(ISD::SUB, DL, VT, N0,
3172 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3173
3174 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3175 return V;
3176
3177 // Look for:
3178 // add (add x, 1), y
3179 // And if the target does not like this form then turn into:
3180 // sub y, (xor x, -1)
3181 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3182 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3183 // Limit this to after legalization if the add has wrap flags
3184 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3185 !N0->getFlags().hasNoSignedWrap()))) {
3186 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3187 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3188 }
3189
3190 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3191 // Hoist one-use subtraction by non-opaque constant:
3192 // (x - C) + y -> (x + y) - C
3193 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3194 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3195 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3196 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3197 }
3198 // Hoist one-use subtraction from non-opaque constant:
3199 // (C - x) + y -> (y - x) + C
3200 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3201 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3202 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3203 }
3204 }
3205
3206 // add (mul x, C), x -> mul x, C+1
3207 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3208 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3209 N0.hasOneUse()) {
3210 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3211 DAG.getConstant(1, DL, VT));
3212 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3213 }
3214
3215 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3216 // rather than 'add 0/-1' (the zext should get folded).
3217 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3218 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3219 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3221 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3222 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3223 }
3224
3225 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3226 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3227 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3228 if (TN->getVT() == MVT::i1) {
3229 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3230 DAG.getConstant(1, DL, VT));
3231 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3232 }
3233 }
3234
3235 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3236 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3237 N1.getResNo() == 0)
3238 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3239 N0, N1.getOperand(0), N1.getOperand(2));
3240
3241 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3243 if (SDValue Carry = getAsCarry(TLI, N1))
3244 return DAG.getNode(ISD::UADDO_CARRY, DL,
3245 DAG.getVTList(VT, Carry.getValueType()), N0,
3246 DAG.getConstant(0, DL, VT), Carry);
3247
3248 return SDValue();
3249}
3250
3251SDValue DAGCombiner::visitADDC(SDNode *N) {
3252 SDValue N0 = N->getOperand(0);
3253 SDValue N1 = N->getOperand(1);
3254 EVT VT = N0.getValueType();
3255 SDLoc DL(N);
3256
3257 // If the flag result is dead, turn this into an ADD.
3258 if (!N->hasAnyUseOfValue(1))
3259 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3260 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3261
3262 // canonicalize constant to RHS.
3263 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3264 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3265 if (N0C && !N1C)
3266 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3267
3268 // fold (addc x, 0) -> x + no carry out
3269 if (isNullConstant(N1))
3270 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3271 DL, MVT::Glue));
3272
3273 // If it cannot overflow, transform into an add.
3275 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3276 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3277
3278 return SDValue();
3279}
3280
3281/**
3282 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3283 * then the flip also occurs if computing the inverse is the same cost.
3284 * This function returns an empty SDValue in case it cannot flip the boolean
3285 * without increasing the cost of the computation. If you want to flip a boolean
3286 * no matter what, use DAG.getLogicalNOT.
3287 */
3289 const TargetLowering &TLI,
3290 bool Force) {
3291 if (Force && isa<ConstantSDNode>(V))
3292 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3293
3294 if (V.getOpcode() != ISD::XOR)
3295 return SDValue();
3296
3297 if (DAG.isBoolConstant(V.getOperand(1)) == true)
3298 return V.getOperand(0);
3299 if (Force && isConstOrConstSplat(V.getOperand(1), false))
3300 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3301 return SDValue();
3302}
3303
3304SDValue DAGCombiner::visitADDO(SDNode *N) {
3305 SDValue N0 = N->getOperand(0);
3306 SDValue N1 = N->getOperand(1);
3307 EVT VT = N0.getValueType();
3308 bool IsSigned = (ISD::SADDO == N->getOpcode());
3309
3310 EVT CarryVT = N->getValueType(1);
3311 SDLoc DL(N);
3312
3313 // If the flag result is dead, turn this into an ADD.
3314 if (!N->hasAnyUseOfValue(1))
3315 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3316 DAG.getUNDEF(CarryVT));
3317
3318 // canonicalize constant to RHS.
3321 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3322
3323 // fold (addo x, 0) -> x + no carry out
3324 if (isNullOrNullSplat(N1))
3325 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3326
3327 // If it cannot overflow, transform into an add.
3328 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3329 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3330 DAG.getConstant(0, DL, CarryVT));
3331
3332 if (IsSigned) {
3333 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3334 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3335 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3336 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3337 } else {
3338 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3339 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3340 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3341 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3342 return CombineTo(
3343 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3344 }
3345
3346 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3347 return Combined;
3348
3349 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3350 return Combined;
3351 }
3352
3353 return SDValue();
3354}
3355
3356SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3357 EVT VT = N0.getValueType();
3358 if (VT.isVector())
3359 return SDValue();
3360
3361 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3362 // If Y + 1 cannot overflow.
3363 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3364 SDValue Y = N1.getOperand(0);
3365 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3367 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3368 N1.getOperand(2));
3369 }
3370
3371 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3373 if (SDValue Carry = getAsCarry(TLI, N1))
3374 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3375 DAG.getConstant(0, SDLoc(N), VT), Carry);
3376
3377 return SDValue();
3378}
3379
3380SDValue DAGCombiner::visitADDE(SDNode *N) {
3381 SDValue N0 = N->getOperand(0);
3382 SDValue N1 = N->getOperand(1);
3383 SDValue CarryIn = N->getOperand(2);
3384
3385 // canonicalize constant to RHS
3386 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3387 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3388 if (N0C && !N1C)
3389 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3390 N1, N0, CarryIn);
3391
3392 // fold (adde x, y, false) -> (addc x, y)
3393 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3394 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3395
3396 return SDValue();
3397}
3398
3399SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3400 SDValue N0 = N->getOperand(0);
3401 SDValue N1 = N->getOperand(1);
3402 SDValue CarryIn = N->getOperand(2);
3403 SDLoc DL(N);
3404
3405 // canonicalize constant to RHS
3406 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3407 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3408 if (N0C && !N1C)
3409 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3410
3411 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3412 if (isNullConstant(CarryIn)) {
3413 if (!LegalOperations ||
3414 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3415 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3416 }
3417
3418 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3419 if (isNullConstant(N0) && isNullConstant(N1)) {
3420 EVT VT = N0.getValueType();
3421 EVT CarryVT = CarryIn.getValueType();
3422 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3423 AddToWorklist(CarryExt.getNode());
3424 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3425 DAG.getConstant(1, DL, VT)),
3426 DAG.getConstant(0, DL, CarryVT));
3427 }
3428
3429 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3430 return Combined;
3431
3432 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3433 return Combined;
3434
3435 // We want to avoid useless duplication.
3436 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3437 // not a binary operation, this is not really possible to leverage this
3438 // existing mechanism for it. However, if more operations require the same
3439 // deduplication logic, then it may be worth generalize.
3440 SDValue Ops[] = {N1, N0, CarryIn};
3441 SDNode *CSENode =
3442 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3443 if (CSENode)
3444 return SDValue(CSENode, 0);
3445
3446 return SDValue();
3447}
3448
3449/**
3450 * If we are facing some sort of diamond carry propagation pattern try to
3451 * break it up to generate something like:
3452 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3453 *
3454 * The end result is usually an increase in operation required, but because the
3455 * carry is now linearized, other transforms can kick in and optimize the DAG.
3456 *
3457 * Patterns typically look something like
3458 * (uaddo A, B)
3459 * / \
3460 * Carry Sum
3461 * | \
3462 * | (uaddo_carry *, 0, Z)
3463 * | /
3464 * \ Carry
3465 * | /
3466 * (uaddo_carry X, *, *)
3467 *
3468 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3469 * produce a combine with a single path for carry propagation.
3470 */
3472 SelectionDAG &DAG, SDValue X,
3473 SDValue Carry0, SDValue Carry1,
3474 SDNode *N) {
3475 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3476 return SDValue();
3477 if (Carry1.getOpcode() != ISD::UADDO)
3478 return SDValue();
3479
3480 SDValue Z;
3481
3482 /**
3483 * First look for a suitable Z. It will present itself in the form of
3484 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3485 */
3486 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3487 isNullConstant(Carry0.getOperand(1))) {
3488 Z = Carry0.getOperand(2);
3489 } else if (Carry0.getOpcode() == ISD::UADDO &&
3490 isOneConstant(Carry0.getOperand(1))) {
3491 EVT VT = Carry0->getValueType(1);
3492 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3493 } else {
3494 // We couldn't find a suitable Z.
3495 return SDValue();
3496 }
3497
3498
3499 auto cancelDiamond = [&](SDValue A,SDValue B) {
3500 SDLoc DL(N);
3501 SDValue NewY =
3502 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3503 Combiner.AddToWorklist(NewY.getNode());
3504 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3505 DAG.getConstant(0, DL, X.getValueType()),
3506 NewY.getValue(1));
3507 };
3508
3509 /**
3510 * (uaddo A, B)
3511 * |
3512 * Sum
3513 * |
3514 * (uaddo_carry *, 0, Z)
3515 */
3516 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3517 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3518 }
3519
3520 /**
3521 * (uaddo_carry A, 0, Z)
3522 * |
3523 * Sum
3524 * |
3525 * (uaddo *, B)
3526 */
3527 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3528 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3529 }
3530
3531 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3532 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3533 }
3534
3535 return SDValue();
3536}
3537
3538// If we are facing some sort of diamond carry/borrow in/out pattern try to
3539// match patterns like:
3540//
3541// (uaddo A, B) CarryIn
3542// | \ |
3543// | \ |
3544// PartialSum PartialCarryOutX /
3545// | | /
3546// | ____|____________/
3547// | / |
3548// (uaddo *, *) \________
3549// | \ \
3550// | \ |
3551// | PartialCarryOutY |
3552// | \ |
3553// | \ /
3554// AddCarrySum | ______/
3555// | /
3556// CarryOut = (or *, *)
3557//
3558// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3559//
3560// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3561//
3562// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3563// with a single path for carry/borrow out propagation.
3565 SDValue N0, SDValue N1, SDNode *N) {
3566 SDValue Carry0 = getAsCarry(TLI, N0);
3567 if (!Carry0)
3568 return SDValue();
3569 SDValue Carry1 = getAsCarry(TLI, N1);
3570 if (!Carry1)
3571 return SDValue();
3572
3573 unsigned Opcode = Carry0.getOpcode();
3574 if (Opcode != Carry1.getOpcode())
3575 return SDValue();
3576 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3577 return SDValue();
3578 // Guarantee identical type of CarryOut
3579 EVT CarryOutType = N->getValueType(0);
3580 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3581 CarryOutType != Carry1.getValue(1).getValueType())
3582 return SDValue();
3583
3584 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3585 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3586 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3587 std::swap(Carry0, Carry1);
3588
3589 // Check if nodes are connected in expected way.
3590 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3591 Carry1.getOperand(1) != Carry0.getValue(0))
3592 return SDValue();
3593
3594 // The carry in value must be on the righthand side for subtraction.
3595 unsigned CarryInOperandNum =
3596 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3597 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3598 return SDValue();
3599 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3600
3601 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3602 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3603 return SDValue();
3604
3605 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3606 CarryIn = getAsCarry(TLI, CarryIn, true);
3607 if (!CarryIn)
3608 return SDValue();
3609
3610 SDLoc DL(N);
3611 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3612 Carry1->getValueType(0));
3613 SDValue Merged =
3614 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3615 Carry0.getOperand(1), CarryIn);
3616
3617 // Please note that because we have proven that the result of the UADDO/USUBO
3618 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3619 // therefore prove that if the first UADDO/USUBO overflows, the second
3620 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3621 // maximum value.
3622 //
3623 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3624 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3625 //
3626 // This is important because it means that OR and XOR can be used to merge
3627 // carry flags; and that AND can return a constant zero.
3628 //
3629 // TODO: match other operations that can merge flags (ADD, etc)
3630 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3631 if (N->getOpcode() == ISD::AND)
3632 return DAG.getConstant(0, DL, CarryOutType);
3633 return Merged.getValue(1);
3634}
3635
3636SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3637 SDValue CarryIn, SDNode *N) {
3638 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3639 // carry.
3640 if (isBitwiseNot(N0))
3641 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3642 SDLoc DL(N);
3643 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3644 N0.getOperand(0), NotC);
3645 return CombineTo(
3646 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3647 }
3648
3649 // Iff the flag result is dead:
3650 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3651 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3652 // or the dependency between the instructions.
3653 if ((N0.getOpcode() == ISD::ADD ||
3654 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3655 N0.getValue(1) != CarryIn)) &&
3656 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3657 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3658 N0.getOperand(0), N0.getOperand(1), CarryIn);
3659
3660 /**
3661 * When one of the uaddo_carry argument is itself a carry, we may be facing
3662 * a diamond carry propagation. In which case we try to transform the DAG
3663 * to ensure linear carry propagation if that is possible.
3664 */
3665 if (auto Y = getAsCarry(TLI, N1)) {
3666 // Because both are carries, Y and Z can be swapped.
3667 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3668 return R;
3669 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3670 return R;
3671 }
3672
3673 return SDValue();
3674}
3675
3676SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3677 SDValue CarryIn, SDNode *N) {
3678 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3679 if (isBitwiseNot(N0)) {
3680 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3681 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3682 N0.getOperand(0), NotC);
3683 }
3684
3685 return SDValue();
3686}
3687
3688SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3689 SDValue N0 = N->getOperand(0);
3690 SDValue N1 = N->getOperand(1);
3691 SDValue CarryIn = N->getOperand(2);
3692 SDLoc DL(N);
3693
3694 // canonicalize constant to RHS
3695 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3696 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3697 if (N0C && !N1C)
3698 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3699
3700 // fold (saddo_carry x, y, false) -> (saddo x, y)
3701 if (isNullConstant(CarryIn)) {
3702 if (!LegalOperations ||
3703 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3704 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3705 }
3706
3707 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3708 return Combined;
3709
3710 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3711 return Combined;
3712
3713 return SDValue();
3714}
3715
3716// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3717// clamp/truncation if necessary.
3718static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3719 SDValue RHS, SelectionDAG &DAG,
3720 const SDLoc &DL) {
3721 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3722 "Illegal truncation");
3723
3724 if (DstVT == SrcVT)
3725 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3726
3727 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3728 // clamping RHS.
3730 DstVT.getScalarSizeInBits());
3731 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3732 return SDValue();
3733
3734 SDValue SatLimit =
3736 DstVT.getScalarSizeInBits()),
3737 DL, SrcVT);
3738 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3739 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3740 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3741 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3742}
3743
3744// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3745// usubsat(a,b), optionally as a truncated type.
3746SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3747 if (N->getOpcode() != ISD::SUB ||
3748 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3749 return SDValue();
3750
3751 EVT SubVT = N->getValueType(0);
3752 SDValue Op0 = N->getOperand(0);
3753 SDValue Op1 = N->getOperand(1);
3754
3755 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3756 // they may be converted to usubsat(a,b).
3757 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3758 SDValue MaxLHS = Op0.getOperand(0);
3759 SDValue MaxRHS = Op0.getOperand(1);
3760 if (MaxLHS == Op1)
3761 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3762 if (MaxRHS == Op1)
3763 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3764 }
3765
3766 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3767 SDValue MinLHS = Op1.getOperand(0);
3768 SDValue MinRHS = Op1.getOperand(1);
3769 if (MinLHS == Op0)
3770 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3771 if (MinRHS == Op0)
3772 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3773 }
3774
3775 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3776 if (Op1.getOpcode() == ISD::TRUNCATE &&
3777 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3778 Op1.getOperand(0).hasOneUse()) {
3779 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3780 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3781 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3782 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3783 DAG, DL);
3784 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3785 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3786 DAG, DL);
3787 }
3788
3789 return SDValue();
3790}
3791
3792// Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
3793// counting leading ones. Broadly, it replaces the substraction with a left
3794// shift.
3795//
3796// * DAG Legalisation Pattern:
3797//
3798// (sub (ctlz (zeroextend (not Src)))
3799// BitWidthDiff)
3800//
3801// if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
3802// -->
3803//
3804// (ctlz_zero_undef (not (shl (anyextend Src)
3805// BitWidthDiff)))
3806//
3807// * Type Legalisation Pattern:
3808//
3809// (sub (ctlz (and (xor Src XorMask)
3810// AndMask))
3811// BitWidthDiff)
3812//
3813// if AndMask has only trailing ones
3814// and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
3815// and XorMask has more trailing ones than AndMask
3816// -->
3817//
3818// (ctlz_zero_undef (not (shl Src BitWidthDiff)))
3819template <class MatchContextClass>
3821 const SDLoc DL(N);
3822 SDValue N0 = N->getOperand(0);
3823 EVT VT = N0.getValueType();
3824 unsigned BitWidth = VT.getScalarSizeInBits();
3825
3826 MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
3827
3828 APInt AndMask;
3829 APInt XorMask;
3830 APInt BitWidthDiff;
3831
3832 SDValue CtlzOp;
3833 SDValue Src;
3834
3835 if (!sd_context_match(
3836 N, Matcher, m_Sub(m_Ctlz(m_Value(CtlzOp)), m_ConstInt(BitWidthDiff))))
3837 return SDValue();
3838
3839 if (sd_context_match(CtlzOp, Matcher, m_ZExt(m_Not(m_Value(Src))))) {
3840 // DAG Legalisation Pattern:
3841 // (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
3842 if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
3843 return SDValue();
3844
3845 Src = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Src);
3846 } else if (sd_context_match(CtlzOp, Matcher,
3847 m_And(m_Xor(m_Value(Src), m_ConstInt(XorMask)),
3848 m_ConstInt(AndMask)))) {
3849 // Type Legalisation Pattern:
3850 // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
3851 unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
3852 if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
3853 return SDValue();
3854 } else
3855 return SDValue();
3856
3857 SDValue ShiftConst = DAG.getShiftAmountConstant(BitWidthDiff, VT, DL);
3858 SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
3859 SDValue Not =
3860 Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
3861
3862 return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
3863}
3864
3865// Since it may not be valid to emit a fold to zero for vector initializers
3866// check if we can before folding.
3867static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3868 SelectionDAG &DAG, bool LegalOperations) {
3869 if (!VT.isVector())
3870 return DAG.getConstant(0, DL, VT);
3871 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3872 return DAG.getConstant(0, DL, VT);
3873 return SDValue();
3874}
3875
3876SDValue DAGCombiner::visitSUB(SDNode *N) {
3877 SDValue N0 = N->getOperand(0);
3878 SDValue N1 = N->getOperand(1);
3879 EVT VT = N0.getValueType();
3880 unsigned BitWidth = VT.getScalarSizeInBits();
3881 SDLoc DL(N);
3882
3883 auto PeekThroughFreeze = [](SDValue N) {
3884 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3885 return N->getOperand(0);
3886 return N;
3887 };
3888
3889 if (SDValue V = foldSubCtlzNot<EmptyMatchContext>(N, DAG))
3890 return V;
3891
3892 // fold (sub x, x) -> 0
3893 // FIXME: Refactor this and xor and other similar operations together.
3894 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3895 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3896
3897 // fold (sub c1, c2) -> c3
3898 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3899 return C;
3900
3901 // fold vector ops
3902 if (VT.isVector()) {
3903 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3904 return FoldedVOp;
3905
3906 // fold (sub x, 0) -> x, vector edition
3908 return N0;
3909 }
3910
3911 if (SDValue NewSel = foldBinOpIntoSelect(N))
3912 return NewSel;
3913
3914 // fold (sub x, c) -> (add x, -c)
3916 return DAG.getNode(ISD::ADD, DL, VT, N0,
3917 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3918
3919 if (isNullOrNullSplat(N0)) {
3920 // Right-shifting everything out but the sign bit followed by negation is
3921 // the same as flipping arithmetic/logical shift type without the negation:
3922 // -(X >>u 31) -> (X >>s 31)
3923 // -(X >>s 31) -> (X >>u 31)
3924 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3926 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3927 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3928 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3929 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3930 }
3931 }
3932
3933 // 0 - X --> 0 if the sub is NUW.
3934 if (N->getFlags().hasNoUnsignedWrap())
3935 return N0;
3936
3938 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3939 // N1 must be 0 because negating the minimum signed value is undefined.
3940 if (N->getFlags().hasNoSignedWrap())
3941 return N0;
3942
3943 // 0 - X --> X if X is 0 or the minimum signed value.
3944 return N1;
3945 }
3946
3947 // Convert 0 - abs(x).
3948 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3950 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3951 return Result;
3952
3953 // Similar to the previous rule, but this time targeting an expanded abs.
3954 // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
3955 // as well as
3956 // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
3957 // Note that these two are applicable to both signed and unsigned min/max.
3958 SDValue X;
3959 SDValue S0;
3960 auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
3961 if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
3962 m_UMax(m_Value(X), NegPat),
3963 m_SMin(m_Value(X), NegPat),
3964 m_UMin(m_Value(X), NegPat))))) {
3965 unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
3966 if (hasOperation(NewOpc, VT))
3967 return DAG.getNode(NewOpc, DL, VT, X, S0);
3968 }
3969
3970 // Fold neg(splat(neg(x)) -> splat(x)
3971 if (VT.isVector()) {
3972 SDValue N1S = DAG.getSplatValue(N1, true);
3973 if (N1S && N1S.getOpcode() == ISD::SUB &&
3974 isNullConstant(N1S.getOperand(0)))
3975 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3976 }
3977 }
3978
3979 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3981 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3982
3983 // fold (A - (0-B)) -> A+B
3984 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3985 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3986
3987 // fold A-(A-B) -> B
3988 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3989 return N1.getOperand(1);
3990
3991 // fold (A+B)-A -> B
3992 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3993 return N0.getOperand(1);
3994
3995 // fold (A+B)-B -> A
3996 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3997 return N0.getOperand(0);
3998
3999 // fold (A+C1)-C2 -> A+(C1-C2)
4000 if (N0.getOpcode() == ISD::ADD) {
4001 SDValue N01 = N0.getOperand(1);
4002 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
4003 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
4004 }
4005
4006 // fold C2-(A+C1) -> (C2-C1)-A
4007 if (N1.getOpcode() == ISD::ADD) {
4008 SDValue N11 = N1.getOperand(1);
4009 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
4010 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
4011 }
4012
4013 // fold (A-C1)-C2 -> A-(C1+C2)
4014 if (N0.getOpcode() == ISD::SUB) {
4015 SDValue N01 = N0.getOperand(1);
4016 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
4017 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
4018 }
4019
4020 // fold (c1-A)-c2 -> (c1-c2)-A
4021 if (N0.getOpcode() == ISD::SUB) {
4022 SDValue N00 = N0.getOperand(0);
4023 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
4024 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
4025 }
4026
4027 SDValue A, B, C;
4028
4029 // fold ((A+(B+C))-B) -> A+C
4030 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
4031 return DAG.getNode(ISD::ADD, DL, VT, A, C);
4032
4033 // fold ((A+(B-C))-B) -> A-C
4034 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
4035 return DAG.getNode(ISD::SUB, DL, VT, A, C);
4036
4037 // fold ((A-(B-C))-C) -> A-B
4038 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
4039 return DAG.getNode(ISD::SUB, DL, VT, A, B);
4040
4041 // fold (A-(B-C)) -> A+(C-B)
4042 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
4043 return DAG.getNode(ISD::ADD, DL, VT, N0,
4044 DAG.getNode(ISD::SUB, DL, VT, C, B));
4045
4046 // A - (A & B) -> A & (~B)
4047 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
4048 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
4049 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
4050
4051 // fold (A - (-B * C)) -> (A + (B * C))
4052 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
4053 return DAG.getNode(ISD::ADD, DL, VT, N0,
4054 DAG.getNode(ISD::MUL, DL, VT, B, C));
4055
4056 // If either operand of a sub is undef, the result is undef
4057 if (N0.isUndef())
4058 return N0;
4059 if (N1.isUndef())
4060 return N1;
4061
4062 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4063 return V;
4064
4065 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4066 return V;
4067
4068 // Try to match AVGCEIL fixedwidth pattern
4069 if (SDValue V = foldSubToAvg(N, DL))
4070 return V;
4071
4072 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
4073 return V;
4074
4075 if (SDValue V = foldSubToUSubSat(VT, N, DL))
4076 return V;
4077
4078 // (A - B) - 1 -> add (xor B, -1), A
4080 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
4081
4082 // Look for:
4083 // sub y, (xor x, -1)
4084 // And if the target does not like this form then turn into:
4085 // add (add x, y), 1
4086 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4087 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4088 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4089 }
4090
4091 // Hoist one-use addition by non-opaque constant:
4092 // (x + C) - y -> (x - y) + C
4093 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4094 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4095 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4096 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4097 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4098 }
4099 // y - (x + C) -> (y - x) - C
4100 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4101 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4102 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4103 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4104 }
4105 // (x - C) - y -> (x - y) - C
4106 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4107 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4108 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4109 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4110 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4111 }
4112 // (C - x) - y -> C - (x + y)
4113 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4114 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4115 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4116 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4117 }
4118
4119 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4120 // rather than 'sub 0/1' (the sext should get folded).
4121 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4122 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4123 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4124 TLI.getBooleanContents(VT) ==
4126 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4127 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4128 }
4129
4130 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4131 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4133 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4134 return DAG.getNode(ISD::ABS, DL, VT, A);
4135
4136 // If the relocation model supports it, consider symbol offsets.
4137 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4138 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4139 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4140 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4141 if (GA->getGlobal() == GB->getGlobal())
4142 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4143 DL, VT);
4144 }
4145
4146 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4147 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4148 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4149 if (TN->getVT() == MVT::i1) {
4150 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4151 DAG.getConstant(1, DL, VT));
4152 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4153 }
4154 }
4155
4156 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4157 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4158 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4159 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4160 }
4161
4162 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4163 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4164 APInt NewStep = -N1.getConstantOperandAPInt(0);
4165 return DAG.getNode(ISD::ADD, DL, VT, N0,
4166 DAG.getStepVector(DL, VT, NewStep));
4167 }
4168
4169 // Prefer an add for more folding potential and possibly better codegen:
4170 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4171 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4172 SDValue ShAmt = N1.getOperand(1);
4173 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4174 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4175 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4176 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4177 }
4178 }
4179
4180 // As with the previous fold, prefer add for more folding potential.
4181 // Subtracting SMIN/0 is the same as adding SMIN/0:
4182 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4183 if (N1.getOpcode() == ISD::SHL) {
4185 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4186 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4187 }
4188
4189 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4190 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4191 N0.getResNo() == 0 && N0.hasOneUse())
4192 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4193 N0.getOperand(0), N1, N0.getOperand(2));
4194
4196 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4197 if (SDValue Carry = getAsCarry(TLI, N0)) {
4198 SDValue X = N1;
4199 SDValue Zero = DAG.getConstant(0, DL, VT);
4200 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4201 return DAG.getNode(ISD::UADDO_CARRY, DL,
4202 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4203 Carry);
4204 }
4205 }
4206
4207 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4208 // sub C0, X --> xor X, C0
4209 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4210 if (!C0->isOpaque()) {
4211 const APInt &C0Val = C0->getAPIntValue();
4212 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4213 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4214 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4215 }
4216 }
4217
4218 // smax(a,b) - smin(a,b) --> abds(a,b)
4219 if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
4220 sd_match(N0, m_SMaxLike(m_Value(A), m_Value(B))) &&
4222 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4223
4224 // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4225 if (hasOperation(ISD::ABDS, VT) &&
4226 sd_match(N0, m_SMinLike(m_Value(A), m_Value(B))) &&
4228 return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
4229
4230 // umax(a,b) - umin(a,b) --> abdu(a,b)
4231 if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
4232 sd_match(N0, m_UMaxLike(m_Value(A), m_Value(B))) &&
4234 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4235
4236 // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4237 if (hasOperation(ISD::ABDU, VT) &&
4238 sd_match(N0, m_UMinLike(m_Value(A), m_Value(B))) &&
4240 return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
4241
4242 return SDValue();
4243}
4244
4245SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4246 unsigned Opcode = N->getOpcode();
4247 SDValue N0 = N->getOperand(0);
4248 SDValue N1 = N->getOperand(1);
4249 EVT VT = N0.getValueType();
4250 bool IsSigned = Opcode == ISD::SSUBSAT;
4251 SDLoc DL(N);
4252
4253 // fold (sub_sat x, undef) -> 0
4254 if (N0.isUndef() || N1.isUndef())
4255 return DAG.getConstant(0, DL, VT);
4256
4257 // fold (sub_sat x, x) -> 0
4258 if (N0 == N1)
4259 return DAG.getConstant(0, DL, VT);
4260
4261 // fold (sub_sat c1, c2) -> c3
4262 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4263 return C;
4264
4265 // fold vector ops
4266 if (VT.isVector()) {
4267 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4268 return FoldedVOp;
4269
4270 // fold (sub_sat x, 0) -> x, vector edition
4272 return N0;
4273 }
4274
4275 // fold (sub_sat x, 0) -> x
4276 if (isNullConstant(N1))
4277 return N0;
4278
4279 // If it cannot overflow, transform into an sub.
4280 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4281 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4282
4283 return SDValue();
4284}
4285
4286SDValue DAGCombiner::visitSUBC(SDNode *N) {
4287 SDValue N0 = N->getOperand(0);
4288 SDValue N1 = N->getOperand(1);
4289 EVT VT = N0.getValueType();
4290 SDLoc DL(N);
4291
4292 // If the flag result is dead, turn this into an SUB.
4293 if (!N->hasAnyUseOfValue(1))
4294 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4295 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4296
4297 // fold (subc x, x) -> 0 + no borrow
4298 if (N0 == N1)
4299 return CombineTo(N, DAG.getConstant(0, DL, VT),
4300 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4301
4302 // fold (subc x, 0) -> x + no borrow
4303 if (isNullConstant(N1))
4304 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4305
4306 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4307 if (isAllOnesConstant(N0))
4308 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4309 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4310
4311 return SDValue();
4312}
4313
4314SDValue DAGCombiner::visitSUBO(SDNode *N) {
4315 SDValue N0 = N->getOperand(0);
4316 SDValue N1 = N->getOperand(1);
4317 EVT VT = N0.getValueType();
4318 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4319
4320 EVT CarryVT = N->getValueType(1);
4321 SDLoc DL(N);
4322
4323 // If the flag result is dead, turn this into an SUB.
4324 if (!N->hasAnyUseOfValue(1))
4325 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4326 DAG.getUNDEF(CarryVT));
4327
4328 // fold (subo x, x) -> 0 + no borrow
4329 if (N0 == N1)
4330 return CombineTo(N, DAG.getConstant(0, DL, VT),
4331 DAG.getConstant(0, DL, CarryVT));
4332
4333 // fold (subox, c) -> (addo x, -c)
4335 if (IsSigned && !N1C->isMinSignedValue())
4336 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4337 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4338
4339 // fold (subo x, 0) -> x + no borrow
4340 if (isNullOrNullSplat(N1))
4341 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4342
4343 // If it cannot overflow, transform into an sub.
4344 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4345 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4346 DAG.getConstant(0, DL, CarryVT));
4347
4348 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4349 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4350 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4351 DAG.getConstant(0, DL, CarryVT));
4352
4353 return SDValue();
4354}
4355
4356SDValue DAGCombiner::visitSUBE(SDNode *N) {
4357 SDValue N0 = N->getOperand(0);
4358 SDValue N1 = N->getOperand(1);
4359 SDValue CarryIn = N->getOperand(2);
4360
4361 // fold (sube x, y, false) -> (subc x, y)
4362 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4363 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4364
4365 return SDValue();
4366}
4367
4368SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4369 SDValue N0 = N->getOperand(0);
4370 SDValue N1 = N->getOperand(1);
4371 SDValue CarryIn = N->getOperand(2);
4372
4373 // fold (usubo_carry x, y, false) -> (usubo x, y)
4374 if (isNullConstant(CarryIn)) {
4375 if (!LegalOperations ||
4376 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4377 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4378 }
4379
4380 return SDValue();
4381}
4382
4383SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4384 SDValue N0 = N->getOperand(0);
4385 SDValue N1 = N->getOperand(1);
4386 SDValue CarryIn = N->getOperand(2);
4387
4388 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4389 if (isNullConstant(CarryIn)) {
4390 if (!LegalOperations ||
4391 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4392 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4393 }
4394
4395 return SDValue();
4396}
4397
4398// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4399// UMULFIXSAT here.
4400SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4401 SDValue N0 = N->getOperand(0);
4402 SDValue N1 = N->getOperand(1);
4403 SDValue Scale = N->getOperand(2);
4404 EVT VT = N0.getValueType();
4405
4406 // fold (mulfix x, undef, scale) -> 0
4407 if (N0.isUndef() || N1.isUndef())
4408 return DAG.getConstant(0, SDLoc(N), VT);
4409
4410 // Canonicalize constant to RHS (vector doesn't have to splat)
4413 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4414
4415 // fold (mulfix x, 0, scale) -> 0
4416 if (isNullConstant(N1))
4417 return DAG.getConstant(0, SDLoc(N), VT);
4418
4419 return SDValue();
4420}
4421
4422template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4423 SDValue N0 = N->getOperand(0);
4424 SDValue N1 = N->getOperand(1);
4425 EVT VT = N0.getValueType();
4426 unsigned BitWidth = VT.getScalarSizeInBits();
4427 SDLoc DL(N);
4428 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4429 MatchContextClass Matcher(DAG, TLI, N);
4430
4431 // fold (mul x, undef) -> 0
4432 if (N0.isUndef() || N1.isUndef())
4433 return DAG.getConstant(0, DL, VT);
4434
4435 // fold (mul c1, c2) -> c1*c2
4436 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4437 return C;
4438
4439 // canonicalize constant to RHS (vector doesn't have to splat)
4442 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4443
4444 bool N1IsConst = false;
4445 bool N1IsOpaqueConst = false;
4446 APInt ConstValue1;
4447
4448 // fold vector ops
4449 if (VT.isVector()) {
4450 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4451 if (!UseVP)
4452 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4453 return FoldedVOp;
4454
4455 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4456 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4457 "Splat APInt should be element width");
4458 } else {
4459 N1IsConst = isa<ConstantSDNode>(N1);
4460 if (N1IsConst) {
4461 ConstValue1 = N1->getAsAPIntVal();
4462 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4463 }
4464 }
4465
4466 // fold (mul x, 0) -> 0
4467 if (N1IsConst && ConstValue1.isZero())
4468 return N1;
4469
4470 // fold (mul x, 1) -> x
4471 if (N1IsConst && ConstValue1.isOne())
4472 return N0;
4473
4474 if (!UseVP)
4475 if (SDValue NewSel = foldBinOpIntoSelect(N))
4476 return NewSel;
4477
4478 // fold (mul x, -1) -> 0-x
4479 if (N1IsConst && ConstValue1.isAllOnes())
4480 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4481
4482 // fold (mul x, (1 << c)) -> x << c
4483 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4484 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4485 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4486 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4487 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4488 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
4489 }
4490 }
4491
4492 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4493 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4494 unsigned Log2Val = (-ConstValue1).logBase2();
4495
4496 // FIXME: If the input is something that is easily negated (e.g. a
4497 // single-use add), we should put the negate there.
4498 return Matcher.getNode(
4499 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4500 Matcher.getNode(ISD::SHL, DL, VT, N0,
4501 DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4502 }
4503
4504 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4505 // hi result is in use in case we hit this mid-legalization.
4506 if (!UseVP) {
4507 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4508 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4509 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4510 // TODO: Can we match commutable operands with getNodeIfExists?
4511 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4512 if (LoHi->hasAnyUseOfValue(1))
4513 return SDValue(LoHi, 0);
4514 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4515 if (LoHi->hasAnyUseOfValue(1))
4516 return SDValue(LoHi, 0);
4517 }
4518 }
4519 }
4520
4521 // Try to transform:
4522 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4523 // mul x, (2^N + 1) --> add (shl x, N), x
4524 // mul x, (2^N - 1) --> sub (shl x, N), x
4525 // Examples: x * 33 --> (x << 5) + x
4526 // x * 15 --> (x << 4) - x
4527 // x * -33 --> -((x << 5) + x)
4528 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4529 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4530 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4531 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4532 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4533 // x * 0xf800 --> (x << 16) - (x << 11)
4534 // x * -0x8800 --> -((x << 15) + (x << 11))
4535 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4536 if (!UseVP && N1IsConst &&
4537 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4538 // TODO: We could handle more general decomposition of any constant by
4539 // having the target set a limit on number of ops and making a
4540 // callback to determine that sequence (similar to sqrt expansion).
4541 unsigned MathOp = ISD::DELETED_NODE;
4542 APInt MulC = ConstValue1.abs();
4543 // The constant `2` should be treated as (2^0 + 1).
4544 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4545 MulC.lshrInPlace(TZeros);
4546 if ((MulC - 1).isPowerOf2())
4547 MathOp = ISD::ADD;
4548 else if ((MulC + 1).isPowerOf2())
4549 MathOp = ISD::SUB;
4550
4551 if (MathOp != ISD::DELETED_NODE) {
4552 unsigned ShAmt =
4553 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4554 ShAmt += TZeros;
4555 assert(ShAmt < BitWidth &&
4556 "multiply-by-constant generated out of bounds shift");
4557 SDValue Shl =
4558 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4559 SDValue R =
4560 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4561 DAG.getNode(ISD::SHL, DL, VT, N0,
4562 DAG.getConstant(TZeros, DL, VT)))
4563 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4564 if (ConstValue1.isNegative())
4565 R = DAG.getNegative(R, DL, VT);
4566 return R;
4567 }
4568 }
4569
4570 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4571 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4572 SDValue N01 = N0.getOperand(1);
4573 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4574 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4575 }
4576
4577 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4578 // use.
4579 {
4580 SDValue Sh, Y;
4581
4582 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4583 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4585 Sh = N0; Y = N1;
4586 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4588 Sh = N1; Y = N0;
4589 }
4590
4591 if (Sh.getNode()) {
4592 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4593 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4594 }
4595 }
4596
4597 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4598 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4602 return Matcher.getNode(
4603 ISD::ADD, DL, VT,
4604 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4605 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4606
4607 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4609 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4610 const APInt &C0 = N0.getConstantOperandAPInt(0);
4611 const APInt &C1 = NC1->getAPIntValue();
4612 return DAG.getVScale(DL, VT, C0 * C1);
4613 }
4614
4615 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4616 APInt MulVal;
4617 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4618 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4619 const APInt &C0 = N0.getConstantOperandAPInt(0);
4620 APInt NewStep = C0 * MulVal;
4621 return DAG.getStepVector(DL, VT, NewStep);
4622 }
4623
4624 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4625 SDValue X;
4626 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4628 N, Matcher,
4630 m_Deferred(X)))) {
4631 return Matcher.getNode(ISD::ABS, DL, VT, X);
4632 }
4633
4634 // Fold ((mul x, 0/undef) -> 0,
4635 // (mul x, 1) -> x) -> x)
4636 // -> and(x, mask)
4637 // We can replace vectors with '0' and '1' factors with a clearing mask.
4638 if (VT.isFixedLengthVector()) {
4639 unsigned NumElts = VT.getVectorNumElements();
4640 SmallBitVector ClearMask;
4641 ClearMask.reserve(NumElts);
4642 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4643 if (!V || V->isZero()) {
4644 ClearMask.push_back(true);
4645 return true;
4646 }
4647 ClearMask.push_back(false);
4648 return V->isOne();
4649 };
4650 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4651 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4652 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4653 EVT LegalSVT = N1.getOperand(0).getValueType();
4654 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4655 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4657 for (unsigned I = 0; I != NumElts; ++I)
4658 if (ClearMask[I])
4659 Mask[I] = Zero;
4660 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4661 }
4662 }
4663
4664 // reassociate mul
4665 // TODO: Change reassociateOps to support vp ops.
4666 if (!UseVP)
4667 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4668 return RMUL;
4669
4670 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4671 // TODO: Change reassociateReduction to support vp ops.
4672 if (!UseVP)
4673 if (SDValue SD =
4674 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4675 return SD;
4676
4677 // Simplify the operands using demanded-bits information.
4679 return SDValue(N, 0);
4680
4681 return SDValue();
4682}
4683
4684/// Return true if divmod libcall is available.
4686 const TargetLowering &TLI) {
4687 RTLIB::Libcall LC;
4688 EVT NodeType = Node->getValueType(0);
4689 if (!NodeType.isSimple())
4690 return false;
4691 switch (NodeType.getSimpleVT().SimpleTy) {
4692 default: return false; // No libcall for vector types.
4693 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4694 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4695 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4696 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4697 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4698 }
4699
4700 return TLI.getLibcallName(LC) != nullptr;
4701}
4702
4703/// Issue divrem if both quotient and remainder are needed.
4704SDValue DAGCombiner::useDivRem(SDNode *Node) {
4705 if (Node->use_empty())
4706 return SDValue(); // This is a dead node, leave it alone.
4707
4708 unsigned Opcode = Node->getOpcode();
4709 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4710 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4711
4712 // DivMod lib calls can still work on non-legal types if using lib-calls.
4713 EVT VT = Node->getValueType(0);
4714 if (VT.isVector() || !VT.isInteger())
4715 return SDValue();
4716
4717 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4718 return SDValue();
4719
4720 // If DIVREM is going to get expanded into a libcall,
4721 // but there is no libcall available, then don't combine.
4722 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4724 return SDValue();
4725
4726 // If div is legal, it's better to do the normal expansion
4727 unsigned OtherOpcode = 0;
4728 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4729 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4730 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4731 return SDValue();
4732 } else {
4733 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4734 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4735 return SDValue();
4736 }
4737
4738 SDValue Op0 = Node->getOperand(0);
4739 SDValue Op1 = Node->getOperand(1);
4740 SDValue combined;
4741 for (SDNode *User : Op0->users()) {
4742 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4743 User->use_empty())
4744 continue;
4745 // Convert the other matching node(s), too;
4746 // otherwise, the DIVREM may get target-legalized into something
4747 // target-specific that we won't be able to recognize.
4748 unsigned UserOpc = User->getOpcode();
4749 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4750 User->getOperand(0) == Op0 &&
4751 User->getOperand(1) == Op1) {
4752 if (!combined) {
4753 if (UserOpc == OtherOpcode) {
4754 SDVTList VTs = DAG.getVTList(VT, VT);
4755 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4756 } else if (UserOpc == DivRemOpc) {
4757 combined = SDValue(User, 0);
4758 } else {
4759 assert(UserOpc == Opcode);
4760 continue;
4761 }
4762 }
4763 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4764 CombineTo(User, combined);
4765 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4766 CombineTo(User, combined.getValue(1));
4767 }
4768 }
4769 return combined;
4770}
4771
4773 SDValue N0 = N->getOperand(0);
4774 SDValue N1 = N->getOperand(1);
4775 EVT VT = N->getValueType(0);
4776 SDLoc DL(N);
4777
4778 unsigned Opc = N->getOpcode();
4779 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4781
4782 // X / undef -> undef
4783 // X % undef -> undef
4784 // X / 0 -> undef
4785 // X % 0 -> undef
4786 // NOTE: This includes vectors where any divisor element is zero/undef.
4787 if (DAG.isUndef(Opc, {N0, N1}))
4788 return DAG.getUNDEF(VT);
4789
4790 // undef / X -> 0
4791 // undef % X -> 0
4792 if (N0.isUndef())
4793 return DAG.getConstant(0, DL, VT);
4794
4795 // 0 / X -> 0
4796 // 0 % X -> 0
4798 if (N0C && N0C->isZero())
4799 return N0;
4800
4801 // X / X -> 1
4802 // X % X -> 0
4803 if (N0 == N1)
4804 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4805
4806 // X / 1 -> X
4807 // X % 1 -> 0
4808 // If this is a boolean op (single-bit element type), we can't have
4809 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4810 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4811 // it's a 1.
4812 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4813 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4814
4815 return SDValue();
4816}
4817
4818SDValue DAGCombiner::visitSDIV(SDNode *N) {
4819 SDValue N0 = N->getOperand(0);
4820 SDValue N1 = N->getOperand(1);
4821 EVT VT = N->getValueType(0);
4822 EVT CCVT = getSetCCResultType(VT);
4823 SDLoc DL(N);
4824
4825 // fold (sdiv c1, c2) -> c1/c2
4826 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4827 return C;
4828
4829 // fold vector ops
4830 if (VT.isVector())
4831 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4832 return FoldedVOp;
4833
4834 // fold (sdiv X, -1) -> 0-X
4836 if (N1C && N1C->isAllOnes())
4837 return DAG.getNegative(N0, DL, VT);
4838
4839 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4840 if (N1C && N1C->isMinSignedValue())
4841 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4842 DAG.getConstant(1, DL, VT),
4843 DAG.getConstant(0, DL, VT));
4844
4845 if (SDValue V = simplifyDivRem(N, DAG))
4846 return V;
4847
4848 if (SDValue NewSel = foldBinOpIntoSelect(N))
4849 return NewSel;
4850
4851 // If we know the sign bits of both operands are zero, strength reduce to a
4852 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4853 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4854 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4855
4856 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4857 // If the corresponding remainder node exists, update its users with
4858 // (Dividend - (Quotient * Divisor).
4859 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4860 { N0, N1 })) {
4861 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4862 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4863 AddToWorklist(Mul.getNode());
4864 AddToWorklist(Sub.getNode());
4865 CombineTo(RemNode, Sub);
4866 }
4867 return V;
4868 }
4869
4870 // sdiv, srem -> sdivrem
4871 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4872 // true. Otherwise, we break the simplification logic in visitREM().
4874 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4875 if (SDValue DivRem = useDivRem(N))
4876 return DivRem;
4877
4878 return SDValue();
4879}
4880
4881static bool isDivisorPowerOfTwo(SDValue Divisor) {
4882 // Helper for determining whether a value is a power-2 constant scalar or a
4883 // vector of such elements.
4884 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4885 if (C->isZero() || C->isOpaque())
4886 return false;
4887 if (C->getAPIntValue().isPowerOf2())
4888 return true;
4889 if (C->getAPIntValue().isNegatedPowerOf2())
4890 return true;
4891 return false;
4892 };
4893
4894 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4895}
4896
4897SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4898 SDLoc DL(N);
4899 EVT VT = N->getValueType(0);
4900 EVT CCVT = getSetCCResultType(VT);
4901 unsigned BitWidth = VT.getScalarSizeInBits();
4902
4903 // fold (sdiv X, pow2) -> simple ops after legalize
4904 // FIXME: We check for the exact bit here because the generic lowering gives
4905 // better results in that case. The target-specific lowering should learn how
4906 // to handle exact sdivs efficiently.
4907 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4908 // Target-specific implementation of sdiv x, pow2.
4909 if (SDValue Res = BuildSDIVPow2(N))
4910 return Res;
4911
4912 // Create constants that are functions of the shift amount value.
4913 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4914 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4915 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4916 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4917 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4918 if (!isConstantOrConstantVector(Inexact))
4919 return SDValue();
4920
4921 // Splat the sign bit into the register
4922 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4923 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4924 AddToWorklist(Sign.getNode());
4925
4926 // Add (N0 < 0) ? abs2 - 1 : 0;
4927 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4928 AddToWorklist(Srl.getNode());
4929 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4930 AddToWorklist(Add.getNode());
4931 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4932 AddToWorklist(Sra.getNode());
4933
4934 // Special case: (sdiv X, 1) -> X
4935 // Special Case: (sdiv X, -1) -> 0-X
4936 SDValue One = DAG.getConstant(1, DL, VT);
4938 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4939 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4940 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4941 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4942
4943 // If dividing by a positive value, we're done. Otherwise, the result must
4944 // be negated.
4945 SDValue Zero = DAG.getConstant(0, DL, VT);
4946 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4947
4948 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4949 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4950 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4951 return Res;
4952 }
4953
4954 // If integer divide is expensive and we satisfy the requirements, emit an
4955 // alternate sequence. Targets may check function attributes for size/speed
4956 // trade-offs.
4959 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4960 if (SDValue Op = BuildSDIV(N))
4961 return Op;
4962
4963 return SDValue();
4964}
4965
4966SDValue DAGCombiner::visitUDIV(SDNode *N) {
4967 SDValue N0 = N->getOperand(0);
4968 SDValue N1 = N->getOperand(1);
4969 EVT VT = N->getValueType(0);
4970 EVT CCVT = getSetCCResultType(VT);
4971 SDLoc DL(N);
4972
4973 // fold (udiv c1, c2) -> c1/c2
4974 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4975 return C;
4976
4977 // fold vector ops
4978 if (VT.isVector())
4979 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4980 return FoldedVOp;
4981
4982 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4984 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4985 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4986 DAG.getConstant(1, DL, VT),
4987 DAG.getConstant(0, DL, VT));
4988 }
4989
4990 if (SDValue V = simplifyDivRem(N, DAG))
4991 return V;
4992
4993 if (SDValue NewSel = foldBinOpIntoSelect(N))
4994 return NewSel;
4995
4996 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4997 // If the corresponding remainder node exists, update its users with
4998 // (Dividend - (Quotient * Divisor).
4999 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
5000 { N0, N1 })) {
5001 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5002 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5003 AddToWorklist(Mul.getNode());
5004 AddToWorklist(Sub.getNode());
5005 CombineTo(RemNode, Sub);
5006 }
5007 return V;
5008 }
5009
5010 // sdiv, srem -> sdivrem
5011 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5012 // true. Otherwise, we break the simplification logic in visitREM().
5014 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5015 if (SDValue DivRem = useDivRem(N))
5016 return DivRem;
5017
5018 // Simplify the operands using demanded-bits information.
5019 // We don't have demanded bits support for UDIV so this just enables constant
5020 // folding based on known bits.
5022 return SDValue(N, 0);
5023
5024 return SDValue();
5025}
5026
5027SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5028 SDLoc DL(N);
5029 EVT VT = N->getValueType(0);
5030
5031 // fold (udiv x, (1 << c)) -> x >>u c
5032 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
5033 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5034 AddToWorklist(LogBase2.getNode());
5035
5036 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5037 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
5038 AddToWorklist(Trunc.getNode());
5039 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5040 }
5041 }
5042
5043 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5044 if (N1.getOpcode() == ISD::SHL) {
5045 SDValue N10 = N1.getOperand(0);
5046 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
5047 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
5048 AddToWorklist(LogBase2.getNode());
5049
5050 EVT ADDVT = N1.getOperand(1).getValueType();
5051 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
5052 AddToWorklist(Trunc.getNode());
5053 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
5054 AddToWorklist(Add.getNode());
5055 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
5056 }
5057 }
5058 }
5059
5060 // fold (udiv x, c) -> alternate
5063 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5064 if (SDValue Op = BuildUDIV(N))
5065 return Op;
5066
5067 return SDValue();
5068}
5069
5070SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
5071 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5072 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5073 // Target-specific implementation of srem x, pow2.
5074 if (SDValue Res = BuildSREMPow2(N))
5075 return Res;
5076 }
5077 return SDValue();
5078}
5079
5080// handles ISD::SREM and ISD::UREM
5081SDValue DAGCombiner::visitREM(SDNode *N) {
5082 unsigned Opcode = N->getOpcode();
5083 SDValue N0 = N->getOperand(0);
5084 SDValue N1 = N->getOperand(1);
5085 EVT VT = N->getValueType(0);
5086 EVT CCVT = getSetCCResultType(VT);
5087
5088 bool isSigned = (Opcode == ISD::SREM);
5089 SDLoc DL(N);
5090
5091 // fold (rem c1, c2) -> c1%c2
5092 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5093 return C;
5094
5095 // fold (urem X, -1) -> select(FX == -1, 0, FX)
5096 // Freeze the numerator to avoid a miscompile with an undefined value.
5097 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5098 CCVT.isVector() == VT.isVector()) {
5099 SDValue F0 = DAG.getFreeze(N0);
5100 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5101 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5102 }
5103
5104 if (SDValue V = simplifyDivRem(N, DAG))
5105 return V;
5106
5107 if (SDValue NewSel = foldBinOpIntoSelect(N))
5108 return NewSel;
5109
5110 if (isSigned) {
5111 // If we know the sign bits of both operands are zero, strength reduce to a
5112 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5113 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5114 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5115 } else {
5116 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5117 // fold (urem x, pow2) -> (and x, pow2-1)
5118 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5119 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5120 AddToWorklist(Add.getNode());
5121 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5122 }
5123 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5124 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5125 // TODO: We should sink the following into isKnownToBePowerOfTwo
5126 // using a OrZero parameter analogous to our handling in ValueTracking.
5127 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5129 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5130 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5131 AddToWorklist(Add.getNode());
5132 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5133 }
5134 }
5135
5137
5138 // If X/C can be simplified by the division-by-constant logic, lower
5139 // X%C to the equivalent of X-X/C*C.
5140 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5141 // speculative DIV must not cause a DIVREM conversion. We guard against this
5142 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5143 // combine will not return a DIVREM. Regardless, checking cheapness here
5144 // makes sense since the simplification results in fatter code.
5145 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5146 if (isSigned) {
5147 // check if we can build faster implementation for srem
5148 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5149 return OptimizedRem;
5150 }
5151
5152 SDValue OptimizedDiv =
5153 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5154 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5155 // If the equivalent Div node also exists, update its users.
5156 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5157 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5158 { N0, N1 }))
5159 CombineTo(DivNode, OptimizedDiv);
5160 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5161 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5162 AddToWorklist(OptimizedDiv.getNode());
5163 AddToWorklist(Mul.getNode());
5164 return Sub;
5165 }
5166 }
5167
5168 // sdiv, srem -> sdivrem
5169 if (SDValue DivRem = useDivRem(N))
5170 return DivRem.getValue(1);
5171
5172 return SDValue();
5173}
5174
5175SDValue DAGCombiner::visitMULHS(SDNode *N) {
5176 SDValue N0 = N->getOperand(0);
5177 SDValue N1 = N->getOperand(1);
5178 EVT VT = N->getValueType(0);
5179 SDLoc DL(N);
5180
5181 // fold (mulhs c1, c2)
5182 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5183 return C;
5184
5185 // canonicalize constant to RHS.
5188 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5189
5190 if (VT.isVector()) {
5191 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5192 return FoldedVOp;
5193
5194 // fold (mulhs x, 0) -> 0
5195 // do not return N1, because undef node may exist.
5197 return DAG.getConstant(0, DL, VT);
5198 }
5199
5200 // fold (mulhs x, 0) -> 0
5201 if (isNullConstant(N1))
5202 return N1;
5203
5204 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5205 if (isOneConstant(N1))
5206 return DAG.getNode(
5207 ISD::SRA, DL, VT, N0,
5209
5210 // fold (mulhs x, undef) -> 0
5211 if (N0.isUndef() || N1.isUndef())
5212 return DAG.getConstant(0, DL, VT);
5213
5214 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5215 // plus a shift.
5216 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5217 !VT.isVector()) {
5218 MVT Simple = VT.getSimpleVT();
5219 unsigned SimpleSize = Simple.getSizeInBits();
5220 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5221 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5222 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5223 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5224 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5225 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5226 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5227 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5228 }
5229 }
5230
5231 return SDValue();
5232}
5233
5234SDValue DAGCombiner::visitMULHU(SDNode *N) {
5235 SDValue N0 = N->getOperand(0);
5236 SDValue N1 = N->getOperand(1);
5237 EVT VT = N->getValueType(0);
5238 SDLoc DL(N);
5239
5240 // fold (mulhu c1, c2)
5241 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5242 return C;
5243
5244 // canonicalize constant to RHS.
5247 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5248
5249 if (VT.isVector()) {
5250 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5251 return FoldedVOp;
5252
5253 // fold (mulhu x, 0) -> 0
5254 // do not return N1, because undef node may exist.
5256 return DAG.getConstant(0, DL, VT);
5257 }
5258
5259 // fold (mulhu x, 0) -> 0
5260 if (isNullConstant(N1))
5261 return N1;
5262
5263 // fold (mulhu x, 1) -> 0
5264 if (isOneConstant(N1))
5265 return DAG.getConstant(0, DL, VT);
5266
5267 // fold (mulhu x, undef) -> 0
5268 if (N0.isUndef() || N1.isUndef())
5269 return DAG.getConstant(0, DL, VT);
5270
5271 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5272 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5273 hasOperation(ISD::SRL, VT)) {
5274 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5275 unsigned NumEltBits = VT.getScalarSizeInBits();
5276 SDValue SRLAmt = DAG.getNode(
5277 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5278 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5279 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5280 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5281 }
5282 }
5283
5284 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5285 // plus a shift.
5286 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5287 !VT.isVector()) {
5288 MVT Simple = VT.getSimpleVT();
5289 unsigned SimpleSize = Simple.getSizeInBits();
5290 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5291 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5292 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5293 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5294 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5295 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5296 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5297 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5298 }
5299 }
5300
5301 // Simplify the operands using demanded-bits information.
5302 // We don't have demanded bits support for MULHU so this just enables constant
5303 // folding based on known bits.
5305 return SDValue(N, 0);
5306
5307 return SDValue();
5308}
5309
5310SDValue DAGCombiner::visitAVG(SDNode *N) {
5311 unsigned Opcode = N->getOpcode();
5312 SDValue N0 = N->getOperand(0);
5313 SDValue N1 = N->getOperand(1);
5314 EVT VT = N->getValueType(0);
5315 SDLoc DL(N);
5316 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5317
5318 // fold (avg c1, c2)
5319 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5320 return C;
5321
5322 // canonicalize constant to RHS.
5325 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5326
5327 if (VT.isVector())
5328 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5329 return FoldedVOp;
5330
5331 // fold (avg x, undef) -> x
5332 if (N0.isUndef())
5333 return N1;
5334 if (N1.isUndef())
5335 return N0;
5336
5337 // fold (avg x, x) --> x
5338 if (N0 == N1 && Level >= AfterLegalizeTypes)
5339 return N0;
5340
5341 // fold (avgfloor x, 0) -> x >> 1
5342 SDValue X, Y;
5344 return DAG.getNode(ISD::SRA, DL, VT, X,
5345 DAG.getShiftAmountConstant(1, VT, DL));
5347 return DAG.getNode(ISD::SRL, DL, VT, X,
5348 DAG.getShiftAmountConstant(1, VT, DL));
5349
5350 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5351 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5352 if (!IsSigned &&
5353 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5354 X.getValueType() == Y.getValueType() &&
5355 hasOperation(Opcode, X.getValueType())) {
5356 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5357 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5358 }
5359 if (IsSigned &&
5360 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5361 X.getValueType() == Y.getValueType() &&
5362 hasOperation(Opcode, X.getValueType())) {
5363 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5364 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5365 }
5366
5367 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5368 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5369 // Check if avgflooru isn't legal/custom but avgceilu is.
5370 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5371 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5372 if (DAG.isKnownNeverZero(N1))
5373 return DAG.getNode(
5374 ISD::AVGCEILU, DL, VT, N0,
5375 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5376 if (DAG.isKnownNeverZero(N0))
5377 return DAG.getNode(
5378 ISD::AVGCEILU, DL, VT, N1,
5379 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5380 }
5381
5382 // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5383 // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5384 if ((Opcode == ISD::AVGFLOORU && hasOperation(ISD::AVGCEILU, VT)) ||
5385 (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGCEILS, VT))) {
5386 SDValue Add;
5387 if (sd_match(N,
5388 m_c_BinOp(Opcode,
5390 m_One())) ||
5391 sd_match(N, m_c_BinOp(Opcode,
5393 m_Value(Y)))) {
5394
5395 if (IsSigned && Add->getFlags().hasNoSignedWrap())
5396 return DAG.getNode(ISD::AVGCEILS, DL, VT, X, Y);
5397
5398 if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5399 return DAG.getNode(ISD::AVGCEILU, DL, VT, X, Y);
5400 }
5401 }
5402
5403 return SDValue();
5404}
5405
5406SDValue DAGCombiner::visitABD(SDNode *N) {
5407 unsigned Opcode = N->getOpcode();
5408 SDValue N0 = N->getOperand(0);
5409 SDValue N1 = N->getOperand(1);
5410 EVT VT = N->getValueType(0);
5411 SDLoc DL(N);
5412
5413 // fold (abd c1, c2)
5414 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5415 return C;
5416
5417 // canonicalize constant to RHS.
5420 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5421
5422 if (VT.isVector())
5423 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5424 return FoldedVOp;
5425
5426 // fold (abd x, undef) -> 0
5427 if (N0.isUndef() || N1.isUndef())
5428 return DAG.getConstant(0, DL, VT);
5429
5430 // fold (abd x, x) -> 0
5431 if (N0 == N1)
5432 return DAG.getConstant(0, DL, VT);
5433
5434 SDValue X;
5435
5436 // fold (abds x, 0) -> abs x
5438 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5439 return DAG.getNode(ISD::ABS, DL, VT, X);
5440
5441 // fold (abdu x, 0) -> x
5443 return X;
5444
5445 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5446 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5447 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5448 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5449
5450 return SDValue();
5451}
5452
5453/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5454/// give the opcodes for the two computations that are being performed. Return
5455/// true if a simplification was made.
5456SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5457 unsigned HiOp) {
5458 // If the high half is not needed, just compute the low half.
5459 bool HiExists = N->hasAnyUseOfValue(1);
5460 if (!HiExists && (!LegalOperations ||
5461 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5462 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5463 return CombineTo(N, Res, Res);
5464 }
5465
5466 // If the low half is not needed, just compute the high half.
5467 bool LoExists = N->hasAnyUseOfValue(0);
5468 if (!LoExists && (!LegalOperations ||
5469 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5470 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5471 return CombineTo(N, Res, Res);
5472 }
5473
5474 // If both halves are used, return as it is.
5475 if (LoExists && HiExists)
5476 return SDValue();
5477
5478 // If the two computed results can be simplified separately, separate them.
5479 if (LoExists) {
5480 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5481 AddToWorklist(Lo.getNode());
5482 SDValue LoOpt = combine(Lo.getNode());
5483 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5484 (!LegalOperations ||
5485 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5486 return CombineTo(N, LoOpt, LoOpt);
5487 }
5488
5489 if (HiExists) {
5490 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5491 AddToWorklist(Hi.getNode());
5492 SDValue HiOpt = combine(Hi.getNode());
5493 if (HiOpt.getNode() && HiOpt != Hi &&
5494 (!LegalOperations ||
5495 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5496 return CombineTo(N, HiOpt, HiOpt);
5497 }
5498
5499 return SDValue();
5500}
5501
5502SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5503 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5504 return Res;
5505
5506 SDValue N0 = N->getOperand(0);
5507 SDValue N1 = N->getOperand(1);
5508 EVT VT = N->getValueType(0);
5509 SDLoc DL(N);
5510
5511 // Constant fold.
5512 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5513 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5514
5515 // canonicalize constant to RHS (vector doesn't have to splat)
5518 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5519
5520 // If the type is twice as wide is legal, transform the mulhu to a wider
5521 // multiply plus a shift.
5522 if (VT.isSimple() && !VT.isVector()) {
5523 MVT Simple = VT.getSimpleVT();
5524 unsigned SimpleSize = Simple.getSizeInBits();
5525 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5526 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5527 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5528 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5529 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5530 // Compute the high part as N1.
5531 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5532 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5533 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5534 // Compute the low part as N0.
5535 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5536 return CombineTo(N, Lo, Hi);
5537 }
5538 }
5539
5540 return SDValue();
5541}
5542
5543SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5544 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5545 return Res;
5546
5547 SDValue N0 = N->getOperand(0);
5548 SDValue N1 = N->getOperand(1);
5549 EVT VT = N->getValueType(0);
5550 SDLoc DL(N);
5551
5552 // Constant fold.
5553 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5554 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5555
5556 // canonicalize constant to RHS (vector doesn't have to splat)
5559 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5560
5561 // (umul_lohi N0, 0) -> (0, 0)
5562 if (isNullConstant(N1)) {
5563 SDValue Zero = DAG.getConstant(0, DL, VT);
5564 return CombineTo(N, Zero, Zero);
5565 }
5566
5567 // (umul_lohi N0, 1) -> (N0, 0)
5568 if (isOneConstant(N1)) {
5569 SDValue Zero = DAG.getConstant(0, DL, VT);
5570 return CombineTo(N, N0, Zero);
5571 }
5572
5573 // If the type is twice as wide is legal, transform the mulhu to a wider
5574 // multiply plus a shift.
5575 if (VT.isSimple() && !VT.isVector()) {
5576 MVT Simple = VT.getSimpleVT();
5577 unsigned SimpleSize = Simple.getSizeInBits();
5578 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5579 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5580 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5581 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5582 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5583 // Compute the high part as N1.
5584 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5585 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5586 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5587 // Compute the low part as N0.
5588 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5589 return CombineTo(N, Lo, Hi);
5590 }
5591 }
5592
5593 return SDValue();
5594}
5595
5596SDValue DAGCombiner::visitMULO(SDNode *N) {
5597 SDValue N0 = N->getOperand(0);
5598 SDValue N1 = N->getOperand(1);
5599 EVT VT = N0.getValueType();
5600 bool IsSigned = (ISD::SMULO == N->getOpcode());
5601
5602 EVT CarryVT = N->getValueType(1);
5603 SDLoc DL(N);
5604
5607
5608 // fold operation with constant operands.
5609 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5610 // multiple results.
5611 if (N0C && N1C) {
5612 bool Overflow;
5613 APInt Result =
5614 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5615 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5616 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5617 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5618 }
5619
5620 // canonicalize constant to RHS.
5623 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5624
5625 // fold (mulo x, 0) -> 0 + no carry out
5626 if (isNullOrNullSplat(N1))
5627 return CombineTo(N, DAG.getConstant(0, DL, VT),
5628 DAG.getConstant(0, DL, CarryVT));
5629
5630 // (mulo x, 2) -> (addo x, x)
5631 // FIXME: This needs a freeze.
5632 if (N1C && N1C->getAPIntValue() == 2 &&
5633 (!IsSigned || VT.getScalarSizeInBits() > 2))
5634 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5635 N->getVTList(), N0, N0);
5636
5637 // A 1 bit SMULO overflows if both inputs are 1.
5638 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5639 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5640 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5641 DAG.getConstant(0, DL, VT), ISD::SETNE);
5642 return CombineTo(N, And, Cmp);
5643 }
5644
5645 // If it cannot overflow, transform into a mul.
5646 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5647 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5648 DAG.getConstant(0, DL, CarryVT));
5649 return SDValue();
5650}
5651
5652// Function to calculate whether the Min/Max pair of SDNodes (potentially
5653// swapped around) make a signed saturate pattern, clamping to between a signed
5654// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5655// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5656// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5657// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5659 SDValue N3, ISD::CondCode CC, unsigned &BW,
5660 bool &Unsigned, SelectionDAG &DAG) {
5661 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5662 ISD::CondCode CC) {
5663 // The compare and select operand should be the same or the select operands
5664 // should be truncated versions of the comparison.
5665 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5666 return 0;
5667 // The constants need to be the same or a truncated version of each other.
5670 if (!N1C || !N3C)
5671 return 0;
5672 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5673 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5674 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5675 return 0;
5676 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5677 };
5678
5679 // Check the initial value is a SMIN/SMAX equivalent.
5680 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5681 if (!Opcode0)
5682 return SDValue();
5683
5684 // We could only need one range check, if the fptosi could never produce
5685 // the upper value.
5686 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5687 if (isNullOrNullSplat(N3)) {
5688 EVT IntVT = N0.getValueType().getScalarType();
5689 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5690 if (FPVT.isSimple()) {
5691 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5692 const fltSemantics &Semantics = InputTy->getFltSemantics();
5693 uint32_t MinBitWidth =
5694 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5695 if (IntVT.getSizeInBits() >= MinBitWidth) {
5696 Unsigned = true;
5697 BW = PowerOf2Ceil(MinBitWidth);
5698 return N0;
5699 }
5700 }
5701 }
5702 }
5703
5704 SDValue N00, N01, N02, N03;
5705 ISD::CondCode N0CC;
5706 switch (N0.getOpcode()) {
5707 case ISD::SMIN:
5708 case ISD::SMAX:
5709 N00 = N02 = N0.getOperand(0);
5710 N01 = N03 = N0.getOperand(1);
5711 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5712 break;
5713 case ISD::SELECT_CC:
5714 N00 = N0.getOperand(0);
5715 N01 = N0.getOperand(1);
5716 N02 = N0.getOperand(2);
5717 N03 = N0.getOperand(3);
5718 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5719 break;
5720 case ISD::SELECT:
5721 case ISD::VSELECT:
5722 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5723 return SDValue();
5724 N00 = N0.getOperand(0).getOperand(0);
5725 N01 = N0.getOperand(0).getOperand(1);
5726 N02 = N0.getOperand(1);
5727 N03 = N0.getOperand(2);
5728 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5729 break;
5730 default:
5731 return SDValue();
5732 }
5733
5734 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5735 if (!Opcode1 || Opcode0 == Opcode1)
5736 return SDValue();
5737
5738 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5739 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5740 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5741 return SDValue();
5742
5743 const APInt &MinC = MinCOp->getAPIntValue();
5744 const APInt &MaxC = MaxCOp->getAPIntValue();
5745 APInt MinCPlus1 = MinC + 1;
5746 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5747 BW = MinCPlus1.exactLogBase2() + 1;
5748 Unsigned = false;
5749 return N02;
5750 }
5751
5752 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5753 BW = MinCPlus1.exactLogBase2();
5754 Unsigned = true;
5755 return N02;
5756 }
5757
5758 return SDValue();
5759}
5760
5763 SelectionDAG &DAG) {
5764 unsigned BW;
5765 bool Unsigned;
5766 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5767 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5768 return SDValue();
5769 EVT FPVT = Fp.getOperand(0).getValueType();
5770 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5771 if (FPVT.isVector())
5772 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5773 FPVT.getVectorElementCount());
5774 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5775 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5776 return SDValue();
5777 SDLoc DL(Fp);
5778 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5779 DAG.getValueType(NewVT.getScalarType()));
5780 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5781}
5782
5785 SelectionDAG &DAG) {
5786 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5787 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5788 // be truncated versions of the setcc (N0/N1).
5789 if ((N0 != N2 &&
5790 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5792 return SDValue();
5795 if (!N1C || !N3C)
5796 return SDValue();
5797 const APInt &C1 = N1C->getAPIntValue();
5798 const APInt &C3 = N3C->getAPIntValue();
5799 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5800 C1 != C3.zext(C1.getBitWidth()))
5801 return SDValue();
5802
5803 unsigned BW = (C1 + 1).exactLogBase2();
5804 EVT FPVT = N0.getOperand(0).getValueType();
5805 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5806 if (FPVT.isVector())
5807 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5808 FPVT.getVectorElementCount());
5810 FPVT, NewVT))
5811 return SDValue();
5812
5813 SDValue Sat =
5814 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5815 DAG.getValueType(NewVT.getScalarType()));
5816 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5817}
5818
5819SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5820 SDValue N0 = N->getOperand(0);
5821 SDValue N1 = N->getOperand(1);
5822 EVT VT = N0.getValueType();
5823 unsigned Opcode = N->getOpcode();
5824 SDLoc DL(N);
5825
5826 // fold operation with constant operands.
5827 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5828 return C;
5829
5830 // If the operands are the same, this is a no-op.
5831 if (N0 == N1)
5832 return N0;
5833
5834 // canonicalize constant to RHS
5837 return DAG.getNode(Opcode, DL, VT, N1, N0);
5838
5839 // fold vector ops
5840 if (VT.isVector())
5841 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5842 return FoldedVOp;
5843
5844 // reassociate minmax
5845 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5846 return RMINMAX;
5847
5848 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5849 // Only do this if:
5850 // 1. The current op isn't legal and the flipped is.
5851 // 2. The saturation pattern is broken by canonicalization in InstCombine.
5852 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
5853 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
5854 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5855 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5856 unsigned AltOpcode;
5857 switch (Opcode) {
5858 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5859 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5860 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5861 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5862 default: llvm_unreachable("Unknown MINMAX opcode");
5863 }
5864 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
5865 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5866 }
5867
5868 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5870 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5871 return S;
5872 if (Opcode == ISD::UMIN)
5873 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5874 return S;
5875
5876 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5877 auto ReductionOpcode = [](unsigned Opcode) {
5878 switch (Opcode) {
5879 case ISD::SMIN:
5880 return ISD::VECREDUCE_SMIN;
5881 case ISD::SMAX:
5882 return ISD::VECREDUCE_SMAX;
5883 case ISD::UMIN:
5884 return ISD::VECREDUCE_UMIN;
5885 case ISD::UMAX:
5886 return ISD::VECREDUCE_UMAX;
5887 default:
5888 llvm_unreachable("Unexpected opcode");
5889 }
5890 };
5891 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5892 SDLoc(N), VT, N0, N1))
5893 return SD;
5894
5895 // Simplify the operands using demanded-bits information.
5897 return SDValue(N, 0);
5898
5899 return SDValue();
5900}
5901
5902/// If this is a bitwise logic instruction and both operands have the same
5903/// opcode, try to sink the other opcode after the logic instruction.
5904SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5905 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5906 EVT VT = N0.getValueType();
5907 unsigned LogicOpcode = N->getOpcode();
5908 unsigned HandOpcode = N0.getOpcode();
5909 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5910 assert(HandOpcode == N1.getOpcode() && "Bad input!");
5911
5912 // Bail early if none of these transforms apply.
5913 if (N0.getNumOperands() == 0)
5914 return SDValue();
5915
5916 // FIXME: We should check number of uses of the operands to not increase
5917 // the instruction count for all transforms.
5918
5919 // Handle size-changing casts (or sign_extend_inreg).
5920 SDValue X = N0.getOperand(0);
5921 SDValue Y = N1.getOperand(0);
5922 EVT XVT = X.getValueType();
5923 SDLoc DL(N);
5924 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5925 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5926 N0.getOperand(1) == N1.getOperand(1))) {
5927 // If both operands have other uses, this transform would create extra
5928 // instructions without eliminating anything.
5929 if (!N0.hasOneUse() && !N1.hasOneUse())
5930 return SDValue();
5931 // We need matching integer source types.
5932 if (XVT != Y.getValueType())
5933 return SDValue();
5934 // Don't create an illegal op during or after legalization. Don't ever
5935 // create an unsupported vector op.
5936 if ((VT.isVector() || LegalOperations) &&
5937 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5938 return SDValue();
5939 // Avoid infinite looping with PromoteIntBinOp.
5940 // TODO: Should we apply desirable/legal constraints to all opcodes?
5941 if ((HandOpcode == ISD::ANY_EXTEND ||
5942 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5943 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5944 return SDValue();
5945 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5946 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5947 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5948 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5949 return DAG.getNode(HandOpcode, DL, VT, Logic);
5950 }
5951
5952 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5953 if (HandOpcode == ISD::TRUNCATE) {
5954 // If both operands have other uses, this transform would create extra
5955 // instructions without eliminating anything.
5956 if (!N0.hasOneUse() && !N1.hasOneUse())
5957 return SDValue();
5958 // We need matching source types.
5959 if (XVT != Y.getValueType())
5960 return SDValue();
5961 // Don't create an illegal op during or after legalization.
5962 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5963 return SDValue();
5964 // Be extra careful sinking truncate. If it's free, there's no benefit in
5965 // widening a binop. Also, don't create a logic op on an illegal type.
5966 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5967 return SDValue();
5968 if (!TLI.isTypeLegal(XVT))
5969 return SDValue();
5970 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5971 return DAG.getNode(HandOpcode, DL, VT, Logic);
5972 }
5973
5974 // For binops SHL/SRL/SRA/AND:
5975 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5976 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5977 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5978 N0.getOperand(1) == N1.getOperand(1)) {
5979 // If either operand has other uses, this transform is not an improvement.
5980 if (!N0.hasOneUse() || !N1.hasOneUse())
5981 return SDValue();
5982 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5983 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5984 }
5985
5986 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5987 if (HandOpcode == ISD::BSWAP) {
5988 // If either operand has other uses, this transform is not an improvement.
5989 if (!N0.hasOneUse() || !N1.hasOneUse())
5990 return SDValue();
5991 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5992 return DAG.getNode(HandOpcode, DL, VT, Logic);
5993 }
5994
5995 // For funnel shifts FSHL/FSHR:
5996 // logic_op (OP x, x1, s), (OP y, y1, s) -->
5997 // --> OP (logic_op x, y), (logic_op, x1, y1), s
5998 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5999 N0.getOperand(2) == N1.getOperand(2)) {
6000 if (!N0.hasOneUse() || !N1.hasOneUse())
6001 return SDValue();
6002 SDValue X1 = N0.getOperand(1);
6003 SDValue Y1 = N1.getOperand(1);
6004 SDValue S = N0.getOperand(2);
6005 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
6006 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
6007 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
6008 }
6009
6010 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6011 // Only perform this optimization up until type legalization, before
6012 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
6013 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
6014 // we don't want to undo this promotion.
6015 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
6016 // on scalars.
6017 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
6018 Level <= AfterLegalizeTypes) {
6019 // Input types must be integer and the same.
6020 if (XVT.isInteger() && XVT == Y.getValueType() &&
6021 !(VT.isVector() && TLI.isTypeLegal(VT) &&
6022 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
6023 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6024 return DAG.getNode(HandOpcode, DL, VT, Logic);
6025 }
6026 }
6027
6028 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
6029 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6030 // If both shuffles use the same mask, and both shuffle within a single
6031 // vector, then it is worthwhile to move the swizzle after the operation.
6032 // The type-legalizer generates this pattern when loading illegal
6033 // vector types from memory. In many cases this allows additional shuffle
6034 // optimizations.
6035 // There are other cases where moving the shuffle after the xor/and/or
6036 // is profitable even if shuffles don't perform a swizzle.
6037 // If both shuffles use the same mask, and both shuffles have the same first
6038 // or second operand, then it might still be profitable to move the shuffle
6039 // after the xor/and/or operation.
6040 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
6041 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
6042 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
6043 assert(X.getValueType() == Y.getValueType() &&
6044 "Inputs to shuffles are not the same type");
6045
6046 // Check that both shuffles use the same mask. The masks are known to be of
6047 // the same length because the result vector type is the same.
6048 // Check also that shuffles have only one use to avoid introducing extra
6049 // instructions.
6050 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6051 !SVN0->getMask().equals(SVN1->getMask()))
6052 return SDValue();
6053
6054 // Don't try to fold this node if it requires introducing a
6055 // build vector of all zeros that might be illegal at this stage.
6056 SDValue ShOp = N0.getOperand(1);
6057 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6058 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6059
6060 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6061 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
6062 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
6063 N0.getOperand(0), N1.getOperand(0));
6064 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6065 }
6066
6067 // Don't try to fold this node if it requires introducing a
6068 // build vector of all zeros that might be illegal at this stage.
6069 ShOp = N0.getOperand(0);
6070 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6071 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6072
6073 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6074 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
6075 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
6076 N1.getOperand(1));
6077 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6078 }
6079 }
6080
6081 return SDValue();
6082}
6083
6084/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
6085SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
6086 const SDLoc &DL) {
6087 SDValue LL, LR, RL, RR, N0CC, N1CC;
6088 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
6089 !isSetCCEquivalent(N1, RL, RR, N1CC))
6090 return SDValue();
6091
6092 assert(N0.getValueType() == N1.getValueType() &&
6093 "Unexpected operand types for bitwise logic op");
6094 assert(LL.getValueType() == LR.getValueType() &&
6095 RL.getValueType() == RR.getValueType() &&
6096 "Unexpected operand types for setcc");
6097
6098 // If we're here post-legalization or the logic op type is not i1, the logic
6099 // op type must match a setcc result type. Also, all folds require new
6100 // operations on the left and right operands, so those types must match.
6101 EVT VT = N0.getValueType();
6102 EVT OpVT = LL.getValueType();
6103 if (LegalOperations || VT.getScalarType() != MVT::i1)
6104 if (VT != getSetCCResultType(OpVT))
6105 return SDValue();
6106 if (OpVT != RL.getValueType())
6107 return SDValue();
6108
6109 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6110 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6111 bool IsInteger = OpVT.isInteger();
6112 if (LR == RR && CC0 == CC1 && IsInteger) {
6113 bool IsZero = isNullOrNullSplat(LR);
6114 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
6115
6116 // All bits clear?
6117 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
6118 // All sign bits clear?
6119 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
6120 // Any bits set?
6121 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
6122 // Any sign bits set?
6123 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6124
6125 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6126 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6127 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6128 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6129 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6130 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6131 AddToWorklist(Or.getNode());
6132 return DAG.getSetCC(DL, VT, Or, LR, CC1);
6133 }
6134
6135 // All bits set?
6136 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6137 // All sign bits set?
6138 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6139 // Any bits clear?
6140 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6141 // Any sign bits clear?
6142 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6143
6144 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6145 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6146 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6147 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6148 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6149 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6150 AddToWorklist(And.getNode());
6151 return DAG.getSetCC(DL, VT, And, LR, CC1);
6152 }
6153 }
6154
6155 // TODO: What is the 'or' equivalent of this fold?
6156 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6157 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6158 IsInteger && CC0 == ISD::SETNE &&
6159 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6160 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6161 SDValue One = DAG.getConstant(1, DL, OpVT);
6162 SDValue Two = DAG.getConstant(2, DL, OpVT);
6163 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6164 AddToWorklist(Add.getNode());
6165 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6166 }
6167
6168 // Try more general transforms if the predicates match and the only user of
6169 // the compares is the 'and' or 'or'.
6170 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6171 N0.hasOneUse() && N1.hasOneUse()) {
6172 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6173 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6174 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6175 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6176 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6177 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6178 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6179 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6180 }
6181
6182 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6183 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6184 // Match a shared variable operand and 2 non-opaque constant operands.
6185 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6186 // The difference of the constants must be a single bit.
6187 const APInt &CMax =
6188 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6189 const APInt &CMin =
6190 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6191 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6192 };
6193 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6194 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6195 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6196 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6197 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6198 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6199 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6200 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6201 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6202 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6203 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6204 }
6205 }
6206 }
6207
6208 // Canonicalize equivalent operands to LL == RL.
6209 if (LL == RR && LR == RL) {
6211 std::swap(RL, RR);
6212 }
6213
6214 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6215 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6216 if (LL == RL && LR == RR) {
6217 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6218 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6219 if (NewCC != ISD::SETCC_INVALID &&
6220 (!LegalOperations ||
6221 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6222 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6223 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6224 }
6225
6226 return SDValue();
6227}
6228
6229static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6230 SelectionDAG &DAG) {
6231 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6232}
6233
6234static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6235 SelectionDAG &DAG) {
6236 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6237}
6238
6239// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6240static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6241 ISD::CondCode CC, unsigned OrAndOpcode,
6242 SelectionDAG &DAG,
6243 bool isFMAXNUMFMINNUM_IEEE,
6244 bool isFMAXNUMFMINNUM) {
6245 // The optimization cannot be applied for all the predicates because
6246 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6247 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6248 // applied at all if one of the operands is a signaling NaN.
6249
6250 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6251 // are non NaN values.
6252 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6253 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
6254 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6255 isFMAXNUMFMINNUM_IEEE
6258 else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
6259 (OrAndOpcode == ISD::OR)) ||
6260 ((CC == ISD::SETLT || CC == ISD::SETLE) &&
6261 (OrAndOpcode == ISD::AND)))
6262 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6263 isFMAXNUMFMINNUM_IEEE
6266 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6267 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6268 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6269 // that there are not any sNaNs, then the optimization is not valid
6270 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6271 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6272 // we can prove that we do not have any sNaNs, then we can do the
6273 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6274 // cases.
6275 else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6276 (OrAndOpcode == ISD::OR)) ||
6277 ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6278 (OrAndOpcode == ISD::AND)))
6279 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6280 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6281 isFMAXNUMFMINNUM_IEEE
6284 else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6285 (OrAndOpcode == ISD::OR)) ||
6286 ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6287 (OrAndOpcode == ISD::AND)))
6288 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6289 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6290 isFMAXNUMFMINNUM_IEEE
6293 return ISD::DELETED_NODE;
6294}
6295
6298 assert(
6299 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6300 "Invalid Op to combine SETCC with");
6301
6302 // TODO: Search past casts/truncates.
6303 SDValue LHS = LogicOp->getOperand(0);
6304 SDValue RHS = LogicOp->getOperand(1);
6305 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6306 !LHS->hasOneUse() || !RHS->hasOneUse())
6307 return SDValue();
6308
6309 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6311 LogicOp, LHS.getNode(), RHS.getNode());
6312
6313 SDValue LHS0 = LHS->getOperand(0);
6314 SDValue RHS0 = RHS->getOperand(0);
6315 SDValue LHS1 = LHS->getOperand(1);
6316 SDValue RHS1 = RHS->getOperand(1);
6317 // TODO: We don't actually need a splat here, for vectors we just need the
6318 // invariants to hold for each element.
6319 auto *LHS1C = isConstOrConstSplat(LHS1);
6320 auto *RHS1C = isConstOrConstSplat(RHS1);
6321 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6322 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6323 EVT VT = LogicOp->getValueType(0);
6324 EVT OpVT = LHS0.getValueType();
6325 SDLoc DL(LogicOp);
6326
6327 // Check if the operands of an and/or operation are comparisons and if they
6328 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6329 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6330 // sequence will be replaced with min-cmp sequence:
6331 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6332 // and and-cmp-cmp will be replaced with max-cmp sequence:
6333 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6334 // The optimization does not work for `==` or `!=` .
6335 // The two comparisons should have either the same predicate or the
6336 // predicate of one of the comparisons is the opposite of the other one.
6337 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6339 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6341 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6342 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6343 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6344 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6345 (OpVT.isFloatingPoint() &&
6346 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6348 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6349 CCL != ISD::SETTRUE &&
6350 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6351
6352 SDValue CommonValue, Operand1, Operand2;
6354 if (CCL == CCR) {
6355 if (LHS0 == RHS0) {
6356 CommonValue = LHS0;
6357 Operand1 = LHS1;
6358 Operand2 = RHS1;
6360 } else if (LHS1 == RHS1) {
6361 CommonValue = LHS1;
6362 Operand1 = LHS0;
6363 Operand2 = RHS0;
6364 CC = CCL;
6365 }
6366 } else {
6367 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6368 if (LHS0 == RHS1) {
6369 CommonValue = LHS0;
6370 Operand1 = LHS1;
6371 Operand2 = RHS0;
6372 CC = CCR;
6373 } else if (RHS0 == LHS1) {
6374 CommonValue = LHS1;
6375 Operand1 = LHS0;
6376 Operand2 = RHS1;
6377 CC = CCL;
6378 }
6379 }
6380
6381 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6382 // handle it using OR/AND.
6383 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6385 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6387
6388 if (CC != ISD::SETCC_INVALID) {
6389 unsigned NewOpcode = ISD::DELETED_NODE;
6390 bool IsSigned = isSignedIntSetCC(CC);
6391 if (OpVT.isInteger()) {
6392 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6393 CC == ISD::SETLT || CC == ISD::SETULT);
6394 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6395 if (IsLess == IsOr)
6396 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6397 else
6398 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6399 } else if (OpVT.isFloatingPoint())
6400 NewOpcode =
6401 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6402 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6403
6404 if (NewOpcode != ISD::DELETED_NODE) {
6405 SDValue MinMaxValue =
6406 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6407 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6408 }
6409 }
6410 }
6411
6412 if (TargetPreference == AndOrSETCCFoldKind::None)
6413 return SDValue();
6414
6415 if (CCL == CCR &&
6416 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6417 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6418 const APInt &APLhs = LHS1C->getAPIntValue();
6419 const APInt &APRhs = RHS1C->getAPIntValue();
6420
6421 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6422 // case this is just a compare).
6423 if (APLhs == (-APRhs) &&
6424 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6425 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6426 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6427 // (icmp eq A, C) | (icmp eq A, -C)
6428 // -> (icmp eq Abs(A), C)
6429 // (icmp ne A, C) & (icmp ne A, -C)
6430 // -> (icmp ne Abs(A), C)
6431 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6432 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6433 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6434 } else if (TargetPreference &
6436
6437 // AndOrSETCCFoldKind::AddAnd:
6438 // A == C0 | A == C1
6439 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6440 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6441 // A != C0 & A != C1
6442 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6443 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6444
6445 // AndOrSETCCFoldKind::NotAnd:
6446 // A == C0 | A == C1
6447 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6448 // -> ~A & smin(C0, C1) == 0
6449 // A != C0 & A != C1
6450 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6451 // -> ~A & smin(C0, C1) != 0
6452
6453 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6454 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6455 APInt Dif = MaxC - MinC;
6456 if (!Dif.isZero() && Dif.isPowerOf2()) {
6457 if (MaxC.isAllOnes() &&
6458 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6459 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6460 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6461 DAG.getConstant(MinC, DL, OpVT));
6462 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6463 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6464 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6465
6466 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6467 DAG.getConstant(-MinC, DL, OpVT));
6468 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6469 DAG.getConstant(~Dif, DL, OpVT));
6470 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6471 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6472 }
6473 }
6474 }
6475 }
6476
6477 return SDValue();
6478}
6479
6480// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6481// We canonicalize to the `select` form in the middle end, but the `and` form
6482// gets better codegen and all tested targets (arm, x86, riscv)
6484 const SDLoc &DL, SelectionDAG &DAG) {
6485 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6486 if (!isNullConstant(F))
6487 return SDValue();
6488
6489 EVT CondVT = Cond.getValueType();
6490 if (TLI.getBooleanContents(CondVT) !=
6492 return SDValue();
6493
6494 if (T.getOpcode() != ISD::AND)
6495 return SDValue();
6496
6497 if (!isOneConstant(T.getOperand(1)))
6498 return SDValue();
6499
6500 EVT OpVT = T.getValueType();
6501
6502 SDValue CondMask =
6503 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6504 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6505}
6506
6507/// This contains all DAGCombine rules which reduce two values combined by
6508/// an And operation to a single value. This makes them reusable in the context
6509/// of visitSELECT(). Rules involving constants are not included as
6510/// visitSELECT() already handles those cases.
6511SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6512 EVT VT = N1.getValueType();
6513 SDLoc DL(N);
6514
6515 // fold (and x, undef) -> 0
6516 if (N0.isUndef() || N1.isUndef())
6517 return DAG.getConstant(0, DL, VT);
6518
6519 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6520 return V;
6521
6522 // Canonicalize:
6523 // and(x, add) -> and(add, x)
6524 if (N1.getOpcode() == ISD::ADD)
6525 std::swap(N0, N1);
6526
6527 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6528 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6529 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6530 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6531 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6532 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6533 // immediate for an add, but it is legal if its top c2 bits are set,
6534 // transform the ADD so the immediate doesn't need to be materialized
6535 // in a register.
6536 APInt ADDC = ADDI->getAPIntValue();
6537 APInt SRLC = SRLI->getAPIntValue();
6538 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6539 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6541 SRLC.getZExtValue());
6542 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6543 ADDC |= Mask;
6544 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6545 SDLoc DL0(N0);
6546 SDValue NewAdd =
6547 DAG.getNode(ISD::ADD, DL0, VT,
6548 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6549 CombineTo(N0.getNode(), NewAdd);
6550 // Return N so it doesn't get rechecked!
6551 return SDValue(N, 0);
6552 }
6553 }
6554 }
6555 }
6556 }
6557 }
6558
6559 return SDValue();
6560}
6561
6562bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6563 EVT LoadResultTy, EVT &ExtVT) {
6564 if (!AndC->getAPIntValue().isMask())
6565 return false;
6566
6567 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6568
6569 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6570 EVT LoadedVT = LoadN->getMemoryVT();
6571
6572 if (ExtVT == LoadedVT &&
6573 (!LegalOperations ||
6574 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6575 // ZEXTLOAD will match without needing to change the size of the value being
6576 // loaded.
6577 return true;
6578 }
6579
6580 // Do not change the width of a volatile or atomic loads.
6581 if (!LoadN->isSimple())
6582 return false;
6583
6584 // Do not generate loads of non-round integer types since these can
6585 // be expensive (and would be wrong if the type is not byte sized).
6586 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6587 return false;
6588
6589 if (LegalOperations &&
6590 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6591 return false;
6592
6593 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6594 return false;
6595
6596 return true;
6597}
6598
6599bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6600 ISD::LoadExtType ExtType, EVT &MemVT,
6601 unsigned ShAmt) {
6602 if (!LDST)
6603 return false;
6604 // Only allow byte offsets.
6605 if (ShAmt % 8)
6606 return false;
6607
6608 // Do not generate loads of non-round integer types since these can
6609 // be expensive (and would be wrong if the type is not byte sized).
6610 if (!MemVT.isRound())
6611 return false;
6612
6613 // Don't change the width of a volatile or atomic loads.
6614 if (!LDST->isSimple())
6615 return false;
6616
6617 EVT LdStMemVT = LDST->getMemoryVT();
6618
6619 // Bail out when changing the scalable property, since we can't be sure that
6620 // we're actually narrowing here.
6621 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6622 return false;
6623
6624 // Verify that we are actually reducing a load width here.
6625 if (LdStMemVT.bitsLT(MemVT))
6626 return false;
6627
6628 // Ensure that this isn't going to produce an unsupported memory access.
6629 if (ShAmt) {
6630 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6631 const unsigned ByteShAmt = ShAmt / 8;
6632 const Align LDSTAlign = LDST->getAlign();
6633 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6634 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6635 LDST->getAddressSpace(), NarrowAlign,
6636 LDST->getMemOperand()->getFlags()))
6637 return false;
6638 }
6639
6640 // It's not possible to generate a constant of extended or untyped type.
6641 EVT PtrType = LDST->getBasePtr().getValueType();
6642 if (PtrType == MVT::Untyped || PtrType.isExtended())
6643 return false;
6644
6645 if (isa<LoadSDNode>(LDST)) {
6646 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6647 // Don't transform one with multiple uses, this would require adding a new
6648 // load.
6649 if (!SDValue(Load, 0).hasOneUse())
6650 return false;
6651
6652 if (LegalOperations &&
6653 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6654 return false;
6655
6656 // For the transform to be legal, the load must produce only two values
6657 // (the value loaded and the chain). Don't transform a pre-increment
6658 // load, for example, which produces an extra value. Otherwise the
6659 // transformation is not equivalent, and the downstream logic to replace
6660 // uses gets things wrong.
6661 if (Load->getNumValues() > 2)
6662 return false;
6663
6664 // If the load that we're shrinking is an extload and we're not just
6665 // discarding the extension we can't simply shrink the load. Bail.
6666 // TODO: It would be possible to merge the extensions in some cases.
6667 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6668 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6669 return false;
6670
6671 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6672 return false;
6673 } else {
6674 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6675 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6676 // Can't write outside the original store
6677 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6678 return false;
6679
6680 if (LegalOperations &&
6681 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6682 return false;
6683 }
6684 return true;
6685}
6686
6687bool DAGCombiner::SearchForAndLoads(SDNode *N,
6689 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6690 ConstantSDNode *Mask,
6691 SDNode *&NodeToMask) {
6692 // Recursively search for the operands, looking for loads which can be
6693 // narrowed.
6694 for (SDValue Op : N->op_values()) {
6695 if (Op.getValueType().isVector())
6696 return false;
6697
6698 // Some constants may need fixing up later if they are too large.
6699 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6700 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6701 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6702 NodesWithConsts.insert(N);
6703 continue;
6704 }
6705
6706 if (!Op.hasOneUse())
6707 return false;
6708
6709 switch(Op.getOpcode()) {
6710 case ISD::LOAD: {
6711 auto *Load = cast<LoadSDNode>(Op);
6712 EVT ExtVT;
6713 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6714 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6715
6716 // ZEXTLOAD is already small enough.
6717 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6718 ExtVT.bitsGE(Load->getMemoryVT()))
6719 continue;
6720
6721 // Use LE to convert equal sized loads to zext.
6722 if (ExtVT.bitsLE(Load->getMemoryVT()))
6723 Loads.push_back(Load);
6724
6725 continue;
6726 }
6727 return false;
6728 }
6729 case ISD::ZERO_EXTEND:
6730 case ISD::AssertZext: {
6731 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6732 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6733 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6734 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6735 Op.getOperand(0).getValueType();
6736
6737 // We can accept extending nodes if the mask is wider or an equal
6738 // width to the original type.
6739 if (ExtVT.bitsGE(VT))
6740 continue;
6741 break;
6742 }
6743 case ISD::OR:
6744 case ISD::XOR:
6745 case ISD::AND:
6746 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6747 NodeToMask))
6748 return false;
6749 continue;
6750 }
6751
6752 // Allow one node which will masked along with any loads found.
6753 if (NodeToMask)
6754 return false;
6755
6756 // Also ensure that the node to be masked only produces one data result.
6757 NodeToMask = Op.getNode();
6758 if (NodeToMask->getNumValues() > 1) {
6759 bool HasValue = false;
6760 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6761 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6762 if (VT != MVT::Glue && VT != MVT::Other) {
6763 if (HasValue) {
6764 NodeToMask = nullptr;
6765 return false;
6766 }
6767 HasValue = true;
6768 }
6769 }
6770 assert(HasValue && "Node to be masked has no data result?");
6771 }
6772 }
6773 return true;
6774}
6775
6776bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6777 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6778 if (!Mask)
6779 return false;
6780
6781 if (!Mask->getAPIntValue().isMask())
6782 return false;
6783
6784 // No need to do anything if the and directly uses a load.
6785 if (isa<LoadSDNode>(N->getOperand(0)))
6786 return false;
6787
6789 SmallPtrSet<SDNode*, 2> NodesWithConsts;
6790 SDNode *FixupNode = nullptr;
6791 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6792 if (Loads.empty())
6793 return false;
6794
6795 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6796 SDValue MaskOp = N->getOperand(1);
6797
6798 // If it exists, fixup the single node we allow in the tree that needs
6799 // masking.
6800 if (FixupNode) {
6801 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6802 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6803 FixupNode->getValueType(0),
6804 SDValue(FixupNode, 0), MaskOp);
6805 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6806 if (And.getOpcode() == ISD ::AND)
6807 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6808 }
6809
6810 // Narrow any constants that need it.
6811 for (auto *LogicN : NodesWithConsts) {
6812 SDValue Op0 = LogicN->getOperand(0);
6813 SDValue Op1 = LogicN->getOperand(1);
6814
6815 if (isa<ConstantSDNode>(Op0))
6816 Op0 =
6817 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6818
6819 if (isa<ConstantSDNode>(Op1))
6820 Op1 =
6821 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6822
6823 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6824 std::swap(Op0, Op1);
6825
6826 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6827 }
6828
6829 // Create narrow loads.
6830 for (auto *Load : Loads) {
6831 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6832 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6833 SDValue(Load, 0), MaskOp);
6834 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6835 if (And.getOpcode() == ISD ::AND)
6836 And = SDValue(
6837 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6838 SDValue NewLoad = reduceLoadWidth(And.getNode());
6839 assert(NewLoad &&
6840 "Shouldn't be masking the load if it can't be narrowed");
6841 CombineTo(Load, NewLoad, NewLoad.getValue(1));
6842 }
6843 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6844 return true;
6845 }
6846 return false;
6847}
6848
6849// Unfold
6850// x & (-1 'logical shift' y)
6851// To
6852// (x 'opposite logical shift' y) 'logical shift' y
6853// if it is better for performance.
6854SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6855 assert(N->getOpcode() == ISD::AND);
6856
6857 SDValue N0 = N->getOperand(0);
6858 SDValue N1 = N->getOperand(1);
6859
6860 // Do we actually prefer shifts over mask?
6862 return SDValue();
6863
6864 // Try to match (-1 '[outer] logical shift' y)
6865 unsigned OuterShift;
6866 unsigned InnerShift; // The opposite direction to the OuterShift.
6867 SDValue Y; // Shift amount.
6868 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6869 if (!M.hasOneUse())
6870 return false;
6871 OuterShift = M->getOpcode();
6872 if (OuterShift == ISD::SHL)
6873 InnerShift = ISD::SRL;
6874 else if (OuterShift == ISD::SRL)
6875 InnerShift = ISD::SHL;
6876 else
6877 return false;
6878 if (!isAllOnesConstant(M->getOperand(0)))
6879 return false;
6880 Y = M->getOperand(1);
6881 return true;
6882 };
6883
6884 SDValue X;
6885 if (matchMask(N1))
6886 X = N0;
6887 else if (matchMask(N0))
6888 X = N1;
6889 else
6890 return SDValue();
6891
6892 SDLoc DL(N);
6893 EVT VT = N->getValueType(0);
6894
6895 // tmp = x 'opposite logical shift' y
6896 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6897 // ret = tmp 'logical shift' y
6898 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6899
6900 return T1;
6901}
6902
6903/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6904/// For a target with a bit test, this is expected to become test + set and save
6905/// at least 1 instruction.
6907 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6908
6909 // Look through an optional extension.
6910 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6911 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6912 And0 = And0.getOperand(0);
6913 if (!isOneConstant(And1) || !And0.hasOneUse())
6914 return SDValue();
6915
6916 SDValue Src = And0;
6917
6918 // Attempt to find a 'not' op.
6919 // TODO: Should we favor test+set even without the 'not' op?
6920 bool FoundNot = false;
6921 if (isBitwiseNot(Src)) {
6922 FoundNot = true;
6923 Src = Src.getOperand(0);
6924
6925 // Look though an optional truncation. The source operand may not be the
6926 // same type as the original 'and', but that is ok because we are masking
6927 // off everything but the low bit.
6928 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6929 Src = Src.getOperand(0);
6930 }
6931
6932 // Match a shift-right by constant.
6933 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6934 return SDValue();
6935
6936 // This is probably not worthwhile without a supported type.
6937 EVT SrcVT = Src.getValueType();
6938 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6939 if (!TLI.isTypeLegal(SrcVT))
6940 return SDValue();
6941
6942 // We might have looked through casts that make this transform invalid.
6943 unsigned BitWidth = SrcVT.getScalarSizeInBits();
6944 SDValue ShiftAmt = Src.getOperand(1);
6945 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6946 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6947 return SDValue();
6948
6949 // Set source to shift source.
6950 Src = Src.getOperand(0);
6951
6952 // Try again to find a 'not' op.
6953 // TODO: Should we favor test+set even with two 'not' ops?
6954 if (!FoundNot) {
6955 if (!isBitwiseNot(Src))
6956 return SDValue();
6957 Src = Src.getOperand(0);
6958 }
6959
6960 if (!TLI.hasBitTest(Src, ShiftAmt))
6961 return SDValue();
6962
6963 // Turn this into a bit-test pattern using mask op + setcc:
6964 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6965 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6966 SDLoc DL(And);
6967 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6968 EVT CCVT =
6969 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6970 SDValue Mask = DAG.getConstant(
6971 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6972 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6973 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6974 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6975 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6976}
6977
6978/// For targets that support usubsat, match a bit-hack form of that operation
6979/// that ends in 'and' and convert it.
6981 EVT VT = N->getValueType(0);
6982 unsigned BitWidth = VT.getScalarSizeInBits();
6983 APInt SignMask = APInt::getSignMask(BitWidth);
6984
6985 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6986 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6987 // xor/add with SMIN (signmask) are logically equivalent.
6988 SDValue X;
6989 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
6991 m_SpecificInt(BitWidth - 1))))) &&
6994 m_SpecificInt(BitWidth - 1))))))
6995 return SDValue();
6996
6997 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
6998 DAG.getConstant(SignMask, DL, VT));
6999}
7000
7001/// Given a bitwise logic operation N with a matching bitwise logic operand,
7002/// fold a pattern where 2 of the source operands are identically shifted
7003/// values. For example:
7004/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7006 SelectionDAG &DAG) {
7007 unsigned LogicOpcode = N->getOpcode();
7008 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7009 "Expected bitwise logic operation");
7010
7011 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
7012 return SDValue();
7013
7014 // Match another bitwise logic op and a shift.
7015 unsigned ShiftOpcode = ShiftOp.getOpcode();
7016 if (LogicOp.getOpcode() != LogicOpcode ||
7017 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
7018 ShiftOpcode == ISD::SRA))
7019 return SDValue();
7020
7021 // Match another shift op inside the first logic operand. Handle both commuted
7022 // possibilities.
7023 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7024 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7025 SDValue X1 = ShiftOp.getOperand(0);
7026 SDValue Y = ShiftOp.getOperand(1);
7027 SDValue X0, Z;
7028 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
7029 LogicOp.getOperand(0).getOperand(1) == Y) {
7030 X0 = LogicOp.getOperand(0).getOperand(0);
7031 Z = LogicOp.getOperand(1);
7032 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
7033 LogicOp.getOperand(1).getOperand(1) == Y) {
7034 X0 = LogicOp.getOperand(1).getOperand(0);
7035 Z = LogicOp.getOperand(0);
7036 } else {
7037 return SDValue();
7038 }
7039
7040 EVT VT = N->getValueType(0);
7041 SDLoc DL(N);
7042 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
7043 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
7044 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
7045}
7046
7047/// Given a tree of logic operations with shape like
7048/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
7049/// try to match and fold shift operations with the same shift amount.
7050/// For example:
7051/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7052/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7054 SDValue RightHand, SelectionDAG &DAG) {
7055 unsigned LogicOpcode = N->getOpcode();
7056 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7057 "Expected bitwise logic operation");
7058 if (LeftHand.getOpcode() != LogicOpcode ||
7059 RightHand.getOpcode() != LogicOpcode)
7060 return SDValue();
7061 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
7062 return SDValue();
7063
7064 // Try to match one of following patterns:
7065 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
7066 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
7067 // Note that foldLogicOfShifts will handle commuted versions of the left hand
7068 // itself.
7069 SDValue CombinedShifts, W;
7070 SDValue R0 = RightHand.getOperand(0);
7071 SDValue R1 = RightHand.getOperand(1);
7072 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
7073 W = R1;
7074 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
7075 W = R0;
7076 else
7077 return SDValue();
7078
7079 EVT VT = N->getValueType(0);
7080 SDLoc DL(N);
7081 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
7082}
7083
7084SDValue DAGCombiner::visitAND(SDNode *N) {
7085 SDValue N0 = N->getOperand(0);
7086 SDValue N1 = N->getOperand(1);
7087 EVT VT = N1.getValueType();
7088 SDLoc DL(N);
7089
7090 // x & x --> x
7091 if (N0 == N1)
7092 return N0;
7093
7094 // fold (and c1, c2) -> c1&c2
7095 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
7096 return C;
7097
7098 // canonicalize constant to RHS
7101 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
7102
7103 if (areBitwiseNotOfEachother(N0, N1))
7104 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
7105
7106 // fold vector ops
7107 if (VT.isVector()) {
7108 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7109 return FoldedVOp;
7110
7111 // fold (and x, 0) -> 0, vector edition
7113 // do not return N1, because undef node may exist in N1
7115 N1.getValueType());
7116
7117 // fold (and x, -1) -> x, vector edition
7119 return N0;
7120
7121 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
7122 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
7123 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7124 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7125 EVT LoadVT = MLoad->getMemoryVT();
7126 EVT ExtVT = VT;
7127 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
7128 // For this AND to be a zero extension of the masked load the elements
7129 // of the BuildVec must mask the bottom bits of the extended element
7130 // type
7131 uint64_t ElementSize =
7133 if (Splat->getAPIntValue().isMask(ElementSize)) {
7134 SDValue NewLoad = DAG.getMaskedLoad(
7135 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7136 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7137 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7138 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7139 bool LoadHasOtherUsers = !N0.hasOneUse();
7140 CombineTo(N, NewLoad);
7141 if (LoadHasOtherUsers)
7142 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
7143 return SDValue(N, 0);
7144 }
7145 }
7146 }
7147 }
7148
7149 // fold (and x, -1) -> x
7150 if (isAllOnesConstant(N1))
7151 return N0;
7152
7153 // if (and x, c) is known to be zero, return 0
7154 unsigned BitWidth = VT.getScalarSizeInBits();
7157 return DAG.getConstant(0, DL, VT);
7158
7159 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7160 return R;
7161
7162 if (SDValue NewSel = foldBinOpIntoSelect(N))
7163 return NewSel;
7164
7165 // reassociate and
7166 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7167 return RAND;
7168
7169 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7170 if (SDValue SD =
7171 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7172 return SD;
7173
7174 // fold (and (or x, C), D) -> D if (C & D) == D
7175 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7176 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7177 };
7178 if (N0.getOpcode() == ISD::OR &&
7179 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7180 return N1;
7181
7182 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7183 SDValue N0Op0 = N0.getOperand(0);
7184 EVT SrcVT = N0Op0.getValueType();
7185 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7186 APInt Mask = ~N1C->getAPIntValue();
7187 Mask = Mask.trunc(SrcBitWidth);
7188
7189 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7190 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7191 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7192
7193 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7194 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7195 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7196 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7197 TLI.isNarrowingProfitable(N, VT, SrcVT))
7198 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7199 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7200 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7201 }
7202
7203 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7204 if (ISD::isExtOpcode(N0.getOpcode())) {
7205 unsigned ExtOpc = N0.getOpcode();
7206 SDValue N0Op0 = N0.getOperand(0);
7207 if (N0Op0.getOpcode() == ISD::AND &&
7208 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7209 N0->hasOneUse() && N0Op0->hasOneUse()) {
7210 if (SDValue NewExt = DAG.FoldConstantArithmetic(ExtOpc, DL, VT,
7211 {N0Op0.getOperand(1)})) {
7212 if (SDValue NewMask =
7213 DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N1, NewExt})) {
7214 return DAG.getNode(ISD::AND, DL, VT,
7215 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7216 NewMask);
7217 }
7218 }
7219 }
7220 }
7221
7222 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7223 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7224 // already be zero by virtue of the width of the base type of the load.
7225 //
7226 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7227 // more cases.
7228 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7230 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7231 N0.getOperand(0).getResNo() == 0) ||
7232 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7233 auto *Load =
7234 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7235
7236 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7237 // This can be a pure constant or a vector splat, in which case we treat the
7238 // vector as a scalar and use the splat value.
7241 N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
7242 Constant = C->getAPIntValue();
7243 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7244 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7245 APInt SplatValue, SplatUndef;
7246 unsigned SplatBitSize;
7247 bool HasAnyUndefs;
7248 // Endianness should not matter here. Code below makes sure that we only
7249 // use the result if the SplatBitSize is a multiple of the vector element
7250 // size. And after that we AND all element sized parts of the splat
7251 // together. So the end result should be the same regardless of in which
7252 // order we do those operations.
7253 const bool IsBigEndian = false;
7254 bool IsSplat =
7255 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7256 HasAnyUndefs, EltBitWidth, IsBigEndian);
7257
7258 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7259 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7260 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7261 // Undef bits can contribute to a possible optimisation if set, so
7262 // set them.
7263 SplatValue |= SplatUndef;
7264
7265 // The splat value may be something like "0x00FFFFFF", which means 0 for
7266 // the first vector value and FF for the rest, repeating. We need a mask
7267 // that will apply equally to all members of the vector, so AND all the
7268 // lanes of the constant together.
7269 Constant = APInt::getAllOnes(EltBitWidth);
7270 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7271 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7272 }
7273 }
7274
7275 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7276 // actually legal and isn't going to get expanded, else this is a false
7277 // optimisation.
7278 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7279 Load->getValueType(0),
7280 Load->getMemoryVT());
7281
7282 // Resize the constant to the same size as the original memory access before
7283 // extension. If it is still the AllOnesValue then this AND is completely
7284 // unneeded.
7285 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7286
7287 bool B;
7288 switch (Load->getExtensionType()) {
7289 default: B = false; break;
7290 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7291 case ISD::ZEXTLOAD:
7292 case ISD::NON_EXTLOAD: B = true; break;
7293 }
7294
7295 if (B && Constant.isAllOnes()) {
7296 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7297 // preserve semantics once we get rid of the AND.
7298 SDValue NewLoad(Load, 0);
7299
7300 // Fold the AND away. NewLoad may get replaced immediately.
7301 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7302
7303 if (Load->getExtensionType() == ISD::EXTLOAD) {
7304 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7305 Load->getValueType(0), SDLoc(Load),
7306 Load->getChain(), Load->getBasePtr(),
7307 Load->getOffset(), Load->getMemoryVT(),
7308 Load->getMemOperand());
7309 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7310 if (Load->getNumValues() == 3) {
7311 // PRE/POST_INC loads have 3 values.
7312 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7313 NewLoad.getValue(2) };
7314 CombineTo(Load, To, 3, true);
7315 } else {
7316 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7317 }
7318 }
7319
7320 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7321 }
7322 }
7323
7324 // Try to convert a constant mask AND into a shuffle clear mask.
7325 if (VT.isVector())
7326 if (SDValue Shuffle = XformToShuffleWithZero(N))
7327 return Shuffle;
7328
7329 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7330 return Combined;
7331
7332 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7334 SDValue Ext = N0.getOperand(0);
7335 EVT ExtVT = Ext->getValueType(0);
7336 SDValue Extendee = Ext->getOperand(0);
7337
7338 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7339 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7340 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7341 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7342 // => (extract_subvector (iN_zeroext v))
7343 SDValue ZeroExtExtendee =
7344 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7345
7346 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7347 N0.getOperand(1));
7348 }
7349 }
7350
7351 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7352 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7353 EVT MemVT = GN0->getMemoryVT();
7354 EVT ScalarVT = MemVT.getScalarType();
7355
7356 if (SDValue(GN0, 0).hasOneUse() &&
7357 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7359 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7360 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7361
7362 SDValue ZExtLoad = DAG.getMaskedGather(
7363 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7364 GN0->getIndexType(), ISD::ZEXTLOAD);
7365
7366 CombineTo(N, ZExtLoad);
7367 AddToWorklist(ZExtLoad.getNode());
7368 // Avoid recheck of N.
7369 return SDValue(N, 0);
7370 }
7371 }
7372
7373 // fold (and (load x), 255) -> (zextload x, i8)
7374 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7375 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7376 if (SDValue Res = reduceLoadWidth(N))
7377 return Res;
7378
7379 if (LegalTypes) {
7380 // Attempt to propagate the AND back up to the leaves which, if they're
7381 // loads, can be combined to narrow loads and the AND node can be removed.
7382 // Perform after legalization so that extend nodes will already be
7383 // combined into the loads.
7384 if (BackwardsPropagateMask(N))
7385 return SDValue(N, 0);
7386 }
7387
7388 if (SDValue Combined = visitANDLike(N0, N1, N))
7389 return Combined;
7390
7391 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7392 if (N0.getOpcode() == N1.getOpcode())
7393 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7394 return V;
7395
7396 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7397 return R;
7398 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7399 return R;
7400
7401 // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7402 // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7403 SDValue X, Y, Z, NotY;
7404 for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
7405 if (sd_match(N,
7406 m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
7407 sd_match(NotY, m_Not(m_Value(Y))) &&
7408 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7409 return DAG.getNode(ISD::AND, DL, VT, X,
7410 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
7411
7412 // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7413 for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
7414 if (sd_match(N, m_And(m_Value(X),
7415 m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) &&
7416 sd_match(NotY, m_Not(m_Value(Y))) &&
7417 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7418 return DAG.getNode(ISD::AND, DL, VT, X,
7419 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
7420
7421 // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7422 // If we are shifting down an extended sign bit, see if we can simplify
7423 // this to shifting the MSB directly to expose further simplifications.
7424 // This pattern often appears after sext_inreg legalization.
7425 APInt Amt;
7426 if (sd_match(N, m_And(m_Srl(m_Value(X), m_ConstInt(Amt)), m_One())) &&
7427 Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
7428 return DAG.getNode(ISD::SRL, DL, VT, X,
7429 DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
7430
7431 // Masking the negated extension of a boolean is just the zero-extended
7432 // boolean:
7433 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7434 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7435 //
7436 // Note: the SimplifyDemandedBits fold below can make an information-losing
7437 // transform, and then we have no way to find this better fold.
7438 if (sd_match(N, m_And(m_Sub(m_Zero(), m_Value(X)), m_One()))) {
7439 if (X.getOpcode() == ISD::ZERO_EXTEND &&
7440 X.getOperand(0).getScalarValueSizeInBits() == 1)
7441 return X;
7442 if (X.getOpcode() == ISD::SIGN_EXTEND &&
7443 X.getOperand(0).getScalarValueSizeInBits() == 1)
7444 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, X.getOperand(0));
7445 }
7446
7447 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7448 // fold (and (sra)) -> (and (srl)) when possible.
7450 return SDValue(N, 0);
7451
7452 // fold (zext_inreg (extload x)) -> (zextload x)
7453 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7454 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7455 (ISD::isEXTLoad(N0.getNode()) ||
7456 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7457 auto *LN0 = cast<LoadSDNode>(N0);
7458 EVT MemVT = LN0->getMemoryVT();
7459 // If we zero all the possible extended bits, then we can turn this into
7460 // a zextload if we are running before legalize or the operation is legal.
7461 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7462 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7463 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7464 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7465 ((!LegalOperations && LN0->isSimple()) ||
7466 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7467 SDValue ExtLoad =
7468 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7469 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7470 AddToWorklist(N);
7471 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7472 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7473 }
7474 }
7475
7476 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7477 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7478 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7479 N0.getOperand(1), false))
7480 return BSwap;
7481 }
7482
7483 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7484 return Shifts;
7485
7486 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7487 return V;
7488
7489 // Recognize the following pattern:
7490 //
7491 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7492 //
7493 // where bitmask is a mask that clears the upper bits of AndVT. The
7494 // number of bits in bitmask must be a power of two.
7495 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7496 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7497 return false;
7498
7499 auto *C = dyn_cast<ConstantSDNode>(RHS);
7500 if (!C)
7501 return false;
7502
7503 if (!C->getAPIntValue().isMask(
7504 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7505 return false;
7506
7507 return true;
7508 };
7509
7510 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7511 if (IsAndZeroExtMask(N0, N1))
7512 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7513
7514 if (hasOperation(ISD::USUBSAT, VT))
7515 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7516 return V;
7517
7518 // Postpone until legalization completed to avoid interference with bswap
7519 // folding
7520 if (LegalOperations || VT.isVector())
7521 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7522 return R;
7523
7524 return SDValue();
7525}
7526
7527/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7528SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7529 bool DemandHighBits) {
7530 if (!LegalOperations)
7531 return SDValue();
7532
7533 EVT VT = N->getValueType(0);
7534 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7535 return SDValue();
7537 return SDValue();
7538
7539 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7540 bool LookPassAnd0 = false;
7541 bool LookPassAnd1 = false;
7542 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7543 std::swap(N0, N1);
7544 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7545 std::swap(N0, N1);
7546 if (N0.getOpcode() == ISD::AND) {
7547 if (!N0->hasOneUse())
7548 return SDValue();
7549 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7550 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7551 // This is needed for X86.
7552 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7553 N01C->getZExtValue() != 0xFFFF))
7554 return SDValue();
7555 N0 = N0.getOperand(0);
7556 LookPassAnd0 = true;
7557 }
7558
7559 if (N1.getOpcode() == ISD::AND) {
7560 if (!N1->hasOneUse())
7561 return SDValue();
7562 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7563 if (!N11C || N11C->getZExtValue() != 0xFF)
7564 return SDValue();
7565 N1 = N1.getOperand(0);
7566 LookPassAnd1 = true;
7567 }
7568
7569 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7570 std::swap(N0, N1);
7571 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7572 return SDValue();
7573 if (!N0->hasOneUse() || !N1->hasOneUse())
7574 return SDValue();
7575
7576 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7577 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7578 if (!N01C || !N11C)
7579 return SDValue();
7580 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7581 return SDValue();
7582
7583 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7584 SDValue N00 = N0->getOperand(0);
7585 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7586 if (!N00->hasOneUse())
7587 return SDValue();
7588 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7589 if (!N001C || N001C->getZExtValue() != 0xFF)
7590 return SDValue();
7591 N00 = N00.getOperand(0);
7592 LookPassAnd0 = true;
7593 }
7594
7595 SDValue N10 = N1->getOperand(0);
7596 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7597 if (!N10->hasOneUse())
7598 return SDValue();
7599 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7600 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7601 // for X86.
7602 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7603 N101C->getZExtValue() != 0xFFFF))
7604 return SDValue();
7605 N10 = N10.getOperand(0);
7606 LookPassAnd1 = true;
7607 }
7608
7609 if (N00 != N10)
7610 return SDValue();
7611
7612 // Make sure everything beyond the low halfword gets set to zero since the SRL
7613 // 16 will clear the top bits.
7614 unsigned OpSizeInBits = VT.getSizeInBits();
7615 if (OpSizeInBits > 16) {
7616 // If the left-shift isn't masked out then the only way this is a bswap is
7617 // if all bits beyond the low 8 are 0. In that case the entire pattern
7618 // reduces to a left shift anyway: leave it for other parts of the combiner.
7619 if (DemandHighBits && !LookPassAnd0)
7620 return SDValue();
7621
7622 // However, if the right shift isn't masked out then it might be because
7623 // it's not needed. See if we can spot that too. If the high bits aren't
7624 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7625 // upper bits to be zero.
7626 if (!LookPassAnd1) {
7627 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7628 if (!DAG.MaskedValueIsZero(N10,
7629 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7630 return SDValue();
7631 }
7632 }
7633
7634 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7635 if (OpSizeInBits > 16) {
7636 SDLoc DL(N);
7637 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7638 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
7639 }
7640 return Res;
7641}
7642
7643/// Return true if the specified node is an element that makes up a 32-bit
7644/// packed halfword byteswap.
7645/// ((x & 0x000000ff) << 8) |
7646/// ((x & 0x0000ff00) >> 8) |
7647/// ((x & 0x00ff0000) << 8) |
7648/// ((x & 0xff000000) >> 8)
7650 if (!N->hasOneUse())
7651 return false;
7652
7653 unsigned Opc = N.getOpcode();
7654 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7655 return false;
7656
7657 SDValue N0 = N.getOperand(0);
7658 unsigned Opc0 = N0.getOpcode();
7659 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7660 return false;
7661
7662 ConstantSDNode *N1C = nullptr;
7663 // SHL or SRL: look upstream for AND mask operand
7664 if (Opc == ISD::AND)
7665 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7666 else if (Opc0 == ISD::AND)
7667 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7668 if (!N1C)
7669 return false;
7670
7671 unsigned MaskByteOffset;
7672 switch (N1C->getZExtValue()) {
7673 default:
7674 return false;
7675 case 0xFF: MaskByteOffset = 0; break;
7676 case 0xFF00: MaskByteOffset = 1; break;
7677 case 0xFFFF:
7678 // In case demanded bits didn't clear the bits that will be shifted out.
7679 // This is needed for X86.
7680 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7681 MaskByteOffset = 1;
7682 break;
7683 }
7684 return false;
7685 case 0xFF0000: MaskByteOffset = 2; break;
7686 case 0xFF000000: MaskByteOffset = 3; break;
7687 }
7688
7689 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7690 if (Opc == ISD::AND) {
7691 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7692 // (x >> 8) & 0xff
7693 // (x >> 8) & 0xff0000
7694 if (Opc0 != ISD::SRL)
7695 return false;
7696 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7697 if (!C || C->getZExtValue() != 8)
7698 return false;
7699 } else {
7700 // (x << 8) & 0xff00
7701 // (x << 8) & 0xff000000
7702 if (Opc0 != ISD::SHL)
7703 return false;
7704 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7705 if (!C || C->getZExtValue() != 8)
7706 return false;
7707 }
7708 } else if (Opc == ISD::SHL) {
7709 // (x & 0xff) << 8
7710 // (x & 0xff0000) << 8
7711 if (MaskByteOffset != 0 && MaskByteOffset != 2)
7712 return false;
7713 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7714 if (!C || C->getZExtValue() != 8)
7715 return false;
7716 } else { // Opc == ISD::SRL
7717 // (x & 0xff00) >> 8
7718 // (x & 0xff000000) >> 8
7719 if (MaskByteOffset != 1 && MaskByteOffset != 3)
7720 return false;
7721 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7722 if (!C || C->getZExtValue() != 8)
7723 return false;
7724 }
7725
7726 if (Parts[MaskByteOffset])
7727 return false;
7728
7729 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7730 return true;
7731}
7732
7733// Match 2 elements of a packed halfword bswap.
7735 if (N.getOpcode() == ISD::OR)
7736 return isBSwapHWordElement(N.getOperand(0), Parts) &&
7737 isBSwapHWordElement(N.getOperand(1), Parts);
7738
7739 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7740 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7741 if (!C || C->getAPIntValue() != 16)
7742 return false;
7743 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7744 return true;
7745 }
7746
7747 return false;
7748}
7749
7750// Match this pattern:
7751// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7752// And rewrite this to:
7753// (rotr (bswap A), 16)
7755 SelectionDAG &DAG, SDNode *N, SDValue N0,
7756 SDValue N1, EVT VT) {
7757 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7758 "MatchBSwapHWordOrAndAnd: expecting i32");
7759 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7760 return SDValue();
7761 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7762 return SDValue();
7763 // TODO: this is too restrictive; lifting this restriction requires more tests
7764 if (!N0->hasOneUse() || !N1->hasOneUse())
7765 return SDValue();
7768 if (!Mask0 || !Mask1)
7769 return SDValue();
7770 if (Mask0->getAPIntValue() != 0xff00ff00 ||
7771 Mask1->getAPIntValue() != 0x00ff00ff)
7772 return SDValue();
7773 SDValue Shift0 = N0.getOperand(0);
7774 SDValue Shift1 = N1.getOperand(0);
7775 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7776 return SDValue();
7777 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7778 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7779 if (!ShiftAmt0 || !ShiftAmt1)
7780 return SDValue();
7781 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7782 return SDValue();
7783 if (Shift0.getOperand(0) != Shift1.getOperand(0))
7784 return SDValue();
7785
7786 SDLoc DL(N);
7787 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7788 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
7789 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7790}
7791
7792/// Match a 32-bit packed halfword bswap. That is
7793/// ((x & 0x000000ff) << 8) |
7794/// ((x & 0x0000ff00) >> 8) |
7795/// ((x & 0x00ff0000) << 8) |
7796/// ((x & 0xff000000) >> 8)
7797/// => (rotl (bswap x), 16)
7798SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7799 if (!LegalOperations)
7800 return SDValue();
7801
7802 EVT VT = N->getValueType(0);
7803 if (VT != MVT::i32)
7804 return SDValue();
7806 return SDValue();
7807
7808 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
7809 return BSwap;
7810
7811 // Try again with commuted operands.
7812 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
7813 return BSwap;
7814
7815
7816 // Look for either
7817 // (or (bswaphpair), (bswaphpair))
7818 // (or (or (bswaphpair), (and)), (and))
7819 // (or (or (and), (bswaphpair)), (and))
7820 SDNode *Parts[4] = {};
7821
7822 if (isBSwapHWordPair(N0, Parts)) {
7823 // (or (or (and), (and)), (or (and), (and)))
7824 if (!isBSwapHWordPair(N1, Parts))
7825 return SDValue();
7826 } else if (N0.getOpcode() == ISD::OR) {
7827 // (or (or (or (and), (and)), (and)), (and))
7828 if (!isBSwapHWordElement(N1, Parts))
7829 return SDValue();
7830 SDValue N00 = N0.getOperand(0);
7831 SDValue N01 = N0.getOperand(1);
7832 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7833 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7834 return SDValue();
7835 } else {
7836 return SDValue();
7837 }
7838
7839 // Make sure the parts are all coming from the same node.
7840 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7841 return SDValue();
7842
7843 SDLoc DL(N);
7844 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7845 SDValue(Parts[0], 0));
7846
7847 // Result of the bswap should be rotated by 16. If it's not legal, then
7848 // do (x << 16) | (x >> 16).
7849 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
7851 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7853 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7854 return DAG.getNode(ISD::OR, DL, VT,
7855 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7856 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7857}
7858
7859/// This contains all DAGCombine rules which reduce two values combined by
7860/// an Or operation to a single value \see visitANDLike().
7861SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
7862 EVT VT = N1.getValueType();
7863
7864 // fold (or x, undef) -> -1
7865 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7866 return DAG.getAllOnesConstant(DL, VT);
7867
7868 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7869 return V;
7870
7871 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
7872 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7873 // Don't increase # computations.
7874 (N0->hasOneUse() || N1->hasOneUse())) {
7875 // We can only do this xform if we know that bits from X that are set in C2
7876 // but not in C1 are already zero. Likewise for Y.
7877 if (const ConstantSDNode *N0O1C =
7879 if (const ConstantSDNode *N1O1C =
7881 // We can only do this xform if we know that bits from X that are set in
7882 // C2 but not in C1 are already zero. Likewise for Y.
7883 const APInt &LHSMask = N0O1C->getAPIntValue();
7884 const APInt &RHSMask = N1O1C->getAPIntValue();
7885
7886 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7887 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7888 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7889 N0.getOperand(0), N1.getOperand(0));
7890 return DAG.getNode(ISD::AND, DL, VT, X,
7891 DAG.getConstant(LHSMask | RHSMask, DL, VT));
7892 }
7893 }
7894 }
7895 }
7896
7897 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7898 if (N0.getOpcode() == ISD::AND &&
7899 N1.getOpcode() == ISD::AND &&
7900 N0.getOperand(0) == N1.getOperand(0) &&
7901 // Don't increase # computations.
7902 (N0->hasOneUse() || N1->hasOneUse())) {
7903 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7904 N0.getOperand(1), N1.getOperand(1));
7905 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7906 }
7907
7908 return SDValue();
7909}
7910
7911/// OR combines for which the commuted variant will be tried as well.
7913 SDNode *N) {
7914 EVT VT = N0.getValueType();
7915 unsigned BW = VT.getScalarSizeInBits();
7916 SDLoc DL(N);
7917
7918 auto peekThroughResize = [](SDValue V) {
7919 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7920 return V->getOperand(0);
7921 return V;
7922 };
7923
7924 SDValue N0Resized = peekThroughResize(N0);
7925 if (N0Resized.getOpcode() == ISD::AND) {
7926 SDValue N1Resized = peekThroughResize(N1);
7927 SDValue N00 = N0Resized.getOperand(0);
7928 SDValue N01 = N0Resized.getOperand(1);
7929
7930 // fold or (and x, y), x --> x
7931 if (N00 == N1Resized || N01 == N1Resized)
7932 return N1;
7933
7934 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7935 // TODO: Set AllowUndefs = true.
7936 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7937 /* AllowUndefs */ false)) {
7938 if (peekThroughResize(NotOperand) == N1Resized)
7939 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
7940 N1);
7941 }
7942
7943 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7944 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7945 /* AllowUndefs */ false)) {
7946 if (peekThroughResize(NotOperand) == N1Resized)
7947 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
7948 N1);
7949 }
7950 }
7951
7952 SDValue X, Y;
7953
7954 // fold or (xor X, N1), N1 --> or X, N1
7955 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
7956 return DAG.getNode(ISD::OR, DL, VT, X, N1);
7957
7958 // fold or (xor x, y), (x and/or y) --> or x, y
7959 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
7960 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
7962 return DAG.getNode(ISD::OR, DL, VT, X, Y);
7963
7964 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7965 return R;
7966
7967 auto peekThroughZext = [](SDValue V) {
7968 if (V->getOpcode() == ISD::ZERO_EXTEND)
7969 return V->getOperand(0);
7970 return V;
7971 };
7972
7973 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7974 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7975 N0.getOperand(0) == N1.getOperand(0) &&
7976 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7977 return N0;
7978
7979 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7980 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7981 N0.getOperand(1) == N1.getOperand(0) &&
7982 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7983 return N0;
7984
7985 // Attempt to match a legalized build_pair-esque pattern:
7986 // or(shl(aext(Hi),BW/2),zext(Lo))
7987 SDValue Lo, Hi;
7988 if (sd_match(N0,
7990 sd_match(N1, m_ZExt(m_Value(Lo))) &&
7991 Lo.getScalarValueSizeInBits() == (BW / 2) &&
7992 Lo.getValueType() == Hi.getValueType()) {
7993 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
7994 SDValue NotLo, NotHi;
7995 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
7996 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
7997 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
7998 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
7999 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8000 DAG.getShiftAmountConstant(BW / 2, VT, DL));
8001 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
8002 }
8003 }
8004
8005 return SDValue();
8006}
8007
8008SDValue DAGCombiner::visitOR(SDNode *N) {
8009 SDValue N0 = N->getOperand(0);
8010 SDValue N1 = N->getOperand(1);
8011 EVT VT = N1.getValueType();
8012 SDLoc DL(N);
8013
8014 // x | x --> x
8015 if (N0 == N1)
8016 return N0;
8017
8018 // fold (or c1, c2) -> c1|c2
8019 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
8020 return C;
8021
8022 // canonicalize constant to RHS
8025 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
8026
8027 // fold vector ops
8028 if (VT.isVector()) {
8029 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8030 return FoldedVOp;
8031
8032 // fold (or x, 0) -> x, vector edition
8034 return N0;
8035
8036 // fold (or x, -1) -> -1, vector edition
8038 // do not return N1, because undef node may exist in N1
8039 return DAG.getAllOnesConstant(DL, N1.getValueType());
8040
8041 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8042 // Do this only if the resulting type / shuffle is legal.
8043 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
8044 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
8045 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
8046 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
8047 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
8048 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8049 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
8050 // Ensure both shuffles have a zero input.
8051 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8052 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
8053 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
8054 bool CanFold = true;
8055 int NumElts = VT.getVectorNumElements();
8056 SmallVector<int, 4> Mask(NumElts, -1);
8057
8058 for (int i = 0; i != NumElts; ++i) {
8059 int M0 = SV0->getMaskElt(i);
8060 int M1 = SV1->getMaskElt(i);
8061
8062 // Determine if either index is pointing to a zero vector.
8063 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
8064 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
8065
8066 // If one element is zero and the otherside is undef, keep undef.
8067 // This also handles the case that both are undef.
8068 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
8069 continue;
8070
8071 // Make sure only one of the elements is zero.
8072 if (M0Zero == M1Zero) {
8073 CanFold = false;
8074 break;
8075 }
8076
8077 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
8078
8079 // We have a zero and non-zero element. If the non-zero came from
8080 // SV0 make the index a LHS index. If it came from SV1, make it
8081 // a RHS index. We need to mod by NumElts because we don't care
8082 // which operand it came from in the original shuffles.
8083 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
8084 }
8085
8086 if (CanFold) {
8087 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
8088 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
8089 SDValue LegalShuffle =
8090 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
8091 if (LegalShuffle)
8092 return LegalShuffle;
8093 }
8094 }
8095 }
8096 }
8097
8098 // fold (or x, 0) -> x
8099 if (isNullConstant(N1))
8100 return N0;
8101
8102 // fold (or x, -1) -> -1
8103 if (isAllOnesConstant(N1))
8104 return N1;
8105
8106 if (SDValue NewSel = foldBinOpIntoSelect(N))
8107 return NewSel;
8108
8109 // fold (or x, c) -> c iff (x & ~c) == 0
8110 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
8111 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8112 return N1;
8113
8114 if (SDValue R = foldAndOrOfSETCC(N, DAG))
8115 return R;
8116
8117 if (SDValue Combined = visitORLike(N0, N1, DL))
8118 return Combined;
8119
8120 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8121 return Combined;
8122
8123 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
8124 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
8125 return BSwap;
8126 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
8127 return BSwap;
8128
8129 // reassociate or
8130 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8131 return ROR;
8132
8133 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8134 if (SDValue SD =
8135 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
8136 return SD;
8137
8138 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8139 // iff (c1 & c2) != 0 or c1/c2 are undef.
8140 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
8141 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8142 };
8143 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8144 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
8145 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
8146 {N1, N0.getOperand(1)})) {
8147 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8148 AddToWorklist(IOR.getNode());
8149 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8150 }
8151 }
8152
8153 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8154 return Combined;
8155 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8156 return Combined;
8157
8158 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8159 if (N0.getOpcode() == N1.getOpcode())
8160 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8161 return V;
8162
8163 // See if this is some rotate idiom.
8164 if (SDValue Rot = MatchRotate(N0, N1, DL))
8165 return Rot;
8166
8167 if (SDValue Load = MatchLoadCombine(N))
8168 return Load;
8169
8170 // Simplify the operands using demanded-bits information.
8172 return SDValue(N, 0);
8173
8174 // If OR can be rewritten into ADD, try combines based on ADD.
8175 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8176 DAG.isADDLike(SDValue(N, 0)))
8177 if (SDValue Combined = visitADDLike(N))
8178 return Combined;
8179
8180 // Postpone until legalization completed to avoid interference with bswap
8181 // folding
8182 if (LegalOperations || VT.isVector())
8183 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8184 return R;
8185
8186 return SDValue();
8187}
8188
8190 SDValue &Mask) {
8191 if (Op.getOpcode() == ISD::AND &&
8192 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8193 Mask = Op.getOperand(1);
8194 return Op.getOperand(0);
8195 }
8196 return Op;
8197}
8198
8199/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8200static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8201 SDValue &Mask) {
8202 Op = stripConstantMask(DAG, Op, Mask);
8203 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8204 Shift = Op;
8205 return true;
8206 }
8207 return false;
8208}
8209
8210/// Helper function for visitOR to extract the needed side of a rotate idiom
8211/// from a shl/srl/mul/udiv. This is meant to handle cases where
8212/// InstCombine merged some outside op with one of the shifts from
8213/// the rotate pattern.
8214/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8215/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8216/// patterns:
8217///
8218/// (or (add v v) (shrl v bitwidth-1)):
8219/// expands (add v v) -> (shl v 1)
8220///
8221/// (or (mul v c0) (shrl (mul v c1) c2)):
8222/// expands (mul v c0) -> (shl (mul v c1) c3)
8223///
8224/// (or (udiv v c0) (shl (udiv v c1) c2)):
8225/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8226///
8227/// (or (shl v c0) (shrl (shl v c1) c2)):
8228/// expands (shl v c0) -> (shl (shl v c1) c3)
8229///
8230/// (or (shrl v c0) (shl (shrl v c1) c2)):
8231/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8232///
8233/// Such that in all cases, c3+c2==bitwidth(op v c1).
8235 SDValue ExtractFrom, SDValue &Mask,
8236 const SDLoc &DL) {
8237 assert(OppShift && ExtractFrom && "Empty SDValue");
8238 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8239 return SDValue();
8240
8241 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8242
8243 // Value and Type of the shift.
8244 SDValue OppShiftLHS = OppShift.getOperand(0);
8245 EVT ShiftedVT = OppShiftLHS.getValueType();
8246
8247 // Amount of the existing shift.
8248 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8249
8250 // (add v v) -> (shl v 1)
8251 // TODO: Should this be a general DAG canonicalization?
8252 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8253 ExtractFrom.getOpcode() == ISD::ADD &&
8254 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8255 ExtractFrom.getOperand(0) == OppShiftLHS &&
8256 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8257 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8258 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8259
8260 // Preconditions:
8261 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8262 //
8263 // Find opcode of the needed shift to be extracted from (op0 v c0).
8264 unsigned Opcode = ISD::DELETED_NODE;
8265 bool IsMulOrDiv = false;
8266 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8267 // opcode or its arithmetic (mul or udiv) variant.
8268 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8269 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8270 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8271 return false;
8272 Opcode = NeededShift;
8273 return true;
8274 };
8275 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8276 // that the needed shift can be extracted from.
8277 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8278 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8279 return SDValue();
8280
8281 // op0 must be the same opcode on both sides, have the same LHS argument,
8282 // and produce the same value type.
8283 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8284 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8285 ShiftedVT != ExtractFrom.getValueType())
8286 return SDValue();
8287
8288 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8289 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8290 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8291 ConstantSDNode *ExtractFromCst =
8292 isConstOrConstSplat(ExtractFrom.getOperand(1));
8293 // TODO: We should be able to handle non-uniform constant vectors for these values
8294 // Check that we have constant values.
8295 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8296 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8297 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8298 return SDValue();
8299
8300 // Compute the shift amount we need to extract to complete the rotate.
8301 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8302 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8303 return SDValue();
8304 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8305 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8306 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8307 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8308 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8309
8310 // Now try extract the needed shift from the ExtractFrom op and see if the
8311 // result matches up with the existing shift's LHS op.
8312 if (IsMulOrDiv) {
8313 // Op to extract from is a mul or udiv by a constant.
8314 // Check:
8315 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8316 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8317 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8318 NeededShiftAmt.getZExtValue());
8319 APInt ResultAmt;
8320 APInt Rem;
8321 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8322 if (Rem != 0 || ResultAmt != OppLHSAmt)
8323 return SDValue();
8324 } else {
8325 // Op to extract from is a shift by a constant.
8326 // Check:
8327 // c2 - (bitwidth(op0 v c0) - c1) == c0
8328 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8329 ExtractFromAmt.getBitWidth()))
8330 return SDValue();
8331 }
8332
8333 // Return the expanded shift op that should allow a rotate to be formed.
8334 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8335 EVT ResVT = ExtractFrom.getValueType();
8336 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8337 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8338}
8339
8340// Return true if we can prove that, whenever Neg and Pos are both in the
8341// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8342// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8343//
8344// (or (shift1 X, Neg), (shift2 X, Pos))
8345//
8346// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8347// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8348// to consider shift amounts with defined behavior.
8349//
8350// The IsRotate flag should be set when the LHS of both shifts is the same.
8351// Otherwise if matching a general funnel shift, it should be clear.
8352static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8353 SelectionDAG &DAG, bool IsRotate) {
8354 const auto &TLI = DAG.getTargetLoweringInfo();
8355 // If EltSize is a power of 2 then:
8356 //
8357 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8358 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8359 //
8360 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8361 // for the stronger condition:
8362 //
8363 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8364 //
8365 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8366 // we can just replace Neg with Neg' for the rest of the function.
8367 //
8368 // In other cases we check for the even stronger condition:
8369 //
8370 // Neg == EltSize - Pos [B]
8371 //
8372 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8373 // behavior if Pos == 0 (and consequently Neg == EltSize).
8374 //
8375 // We could actually use [A] whenever EltSize is a power of 2, but the
8376 // only extra cases that it would match are those uninteresting ones
8377 // where Neg and Pos are never in range at the same time. E.g. for
8378 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8379 // as well as (sub 32, Pos), but:
8380 //
8381 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8382 //
8383 // always invokes undefined behavior for 32-bit X.
8384 //
8385 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8386 // This allows us to peek through any operations that only affect Mask's
8387 // un-demanded bits.
8388 //
8389 // NOTE: We can only do this when matching operations which won't modify the
8390 // least Log2(EltSize) significant bits and not a general funnel shift.
8391 unsigned MaskLoBits = 0;
8392 if (IsRotate && isPowerOf2_64(EltSize)) {
8393 unsigned Bits = Log2_64(EltSize);
8394 unsigned NegBits = Neg.getScalarValueSizeInBits();
8395 if (NegBits >= Bits) {
8396 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8397 if (SDValue Inner =
8399 Neg = Inner;
8400 MaskLoBits = Bits;
8401 }
8402 }
8403 }
8404
8405 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8406 if (Neg.getOpcode() != ISD::SUB)
8407 return false;
8409 if (!NegC)
8410 return false;
8411 SDValue NegOp1 = Neg.getOperand(1);
8412
8413 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8414 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8415 // are redundant for the purpose of the equality.
8416 if (MaskLoBits) {
8417 unsigned PosBits = Pos.getScalarValueSizeInBits();
8418 if (PosBits >= MaskLoBits) {
8419 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8420 if (SDValue Inner =
8422 Pos = Inner;
8423 }
8424 }
8425 }
8426
8427 // The condition we need is now:
8428 //
8429 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8430 //
8431 // If NegOp1 == Pos then we need:
8432 //
8433 // EltSize & Mask == NegC & Mask
8434 //
8435 // (because "x & Mask" is a truncation and distributes through subtraction).
8436 //
8437 // We also need to account for a potential truncation of NegOp1 if the amount
8438 // has already been legalized to a shift amount type.
8439 APInt Width;
8440 if ((Pos == NegOp1) ||
8441 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8442 Width = NegC->getAPIntValue();
8443
8444 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8445 // Then the condition we want to prove becomes:
8446 //
8447 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8448 //
8449 // which, again because "x & Mask" is a truncation, becomes:
8450 //
8451 // NegC & Mask == (EltSize - PosC) & Mask
8452 // EltSize & Mask == (NegC + PosC) & Mask
8453 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8454 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8455 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8456 else
8457 return false;
8458 } else
8459 return false;
8460
8461 // Now we just need to check that EltSize & Mask == Width & Mask.
8462 if (MaskLoBits)
8463 // EltSize & Mask is 0 since Mask is EltSize - 1.
8464 return Width.getLoBits(MaskLoBits) == 0;
8465 return Width == EltSize;
8466}
8467
8468// A subroutine of MatchRotate used once we have found an OR of two opposite
8469// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8470// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8471// former being preferred if supported. InnerPos and InnerNeg are Pos and
8472// Neg with outer conversions stripped away.
8473SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8474 SDValue Neg, SDValue InnerPos,
8475 SDValue InnerNeg, bool HasPos,
8476 unsigned PosOpcode, unsigned NegOpcode,
8477 const SDLoc &DL) {
8478 // fold (or (shl x, (*ext y)),
8479 // (srl x, (*ext (sub 32, y)))) ->
8480 // (rotl x, y) or (rotr x, (sub 32, y))
8481 //
8482 // fold (or (shl x, (*ext (sub 32, y))),
8483 // (srl x, (*ext y))) ->
8484 // (rotr x, y) or (rotl x, (sub 32, y))
8485 EVT VT = Shifted.getValueType();
8486 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8487 /*IsRotate*/ true)) {
8488 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8489 HasPos ? Pos : Neg);
8490 }
8491
8492 return SDValue();
8493}
8494
8495// A subroutine of MatchRotate used once we have found an OR of two opposite
8496// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8497// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8498// former being preferred if supported. InnerPos and InnerNeg are Pos and
8499// Neg with outer conversions stripped away.
8500// TODO: Merge with MatchRotatePosNeg.
8501SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8502 SDValue Neg, SDValue InnerPos,
8503 SDValue InnerNeg, bool HasPos,
8504 unsigned PosOpcode, unsigned NegOpcode,
8505 const SDLoc &DL) {
8506 EVT VT = N0.getValueType();
8507 unsigned EltBits = VT.getScalarSizeInBits();
8508
8509 // fold (or (shl x0, (*ext y)),
8510 // (srl x1, (*ext (sub 32, y)))) ->
8511 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8512 //
8513 // fold (or (shl x0, (*ext (sub 32, y))),
8514 // (srl x1, (*ext y))) ->
8515 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8516 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8517 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8518 HasPos ? Pos : Neg);
8519 }
8520
8521 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8522 // so for now just use the PosOpcode case if its legal.
8523 // TODO: When can we use the NegOpcode case?
8524 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8525 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8526 if (Op.getOpcode() != BinOpc)
8527 return false;
8528 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8529 return Cst && (Cst->getAPIntValue() == Imm);
8530 };
8531
8532 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8533 // -> (fshl x0, x1, y)
8534 if (IsBinOpImm(N1, ISD::SRL, 1) &&
8535 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8536 InnerPos == InnerNeg.getOperand(0) &&
8538 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8539 }
8540
8541 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8542 // -> (fshr x0, x1, y)
8543 if (IsBinOpImm(N0, ISD::SHL, 1) &&
8544 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8545 InnerNeg == InnerPos.getOperand(0) &&
8547 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8548 }
8549
8550 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8551 // -> (fshr x0, x1, y)
8552 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8553 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8554 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8555 InnerNeg == InnerPos.getOperand(0) &&
8557 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8558 }
8559 }
8560
8561 return SDValue();
8562}
8563
8564// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8565// idioms for rotate, and if the target supports rotation instructions, generate
8566// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8567// with different shifted sources.
8568SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8569 EVT VT = LHS.getValueType();
8570
8571 // The target must have at least one rotate/funnel flavor.
8572 // We still try to match rotate by constant pre-legalization.
8573 // TODO: Support pre-legalization funnel-shift by constant.
8574 bool HasROTL = hasOperation(ISD::ROTL, VT);
8575 bool HasROTR = hasOperation(ISD::ROTR, VT);
8576 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8577 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8578
8579 // If the type is going to be promoted and the target has enabled custom
8580 // lowering for rotate, allow matching rotate by non-constants. Only allow
8581 // this for scalar types.
8582 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8586 }
8587
8588 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8589 return SDValue();
8590
8591 // Check for truncated rotate.
8592 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8593 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8594 assert(LHS.getValueType() == RHS.getValueType());
8595 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8596 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8597 }
8598 }
8599
8600 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8601 SDValue LHSShift; // The shift.
8602 SDValue LHSMask; // AND value if any.
8603 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8604
8605 SDValue RHSShift; // The shift.
8606 SDValue RHSMask; // AND value if any.
8607 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8608
8609 // If neither side matched a rotate half, bail
8610 if (!LHSShift && !RHSShift)
8611 return SDValue();
8612
8613 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8614 // side of the rotate, so try to handle that here. In all cases we need to
8615 // pass the matched shift from the opposite side to compute the opcode and
8616 // needed shift amount to extract. We still want to do this if both sides
8617 // matched a rotate half because one half may be a potential overshift that
8618 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8619 // single one).
8620
8621 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8622 if (LHSShift)
8623 if (SDValue NewRHSShift =
8624 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8625 RHSShift = NewRHSShift;
8626 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8627 if (RHSShift)
8628 if (SDValue NewLHSShift =
8629 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8630 LHSShift = NewLHSShift;
8631
8632 // If a side is still missing, nothing else we can do.
8633 if (!RHSShift || !LHSShift)
8634 return SDValue();
8635
8636 // At this point we've matched or extracted a shift op on each side.
8637
8638 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8639 return SDValue(); // Shifts must disagree.
8640
8641 // Canonicalize shl to left side in a shl/srl pair.
8642 if (RHSShift.getOpcode() == ISD::SHL) {
8643 std::swap(LHS, RHS);
8644 std::swap(LHSShift, RHSShift);
8645 std::swap(LHSMask, RHSMask);
8646 }
8647
8648 // Something has gone wrong - we've lost the shl/srl pair - bail.
8649 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8650 return SDValue();
8651
8652 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8653 SDValue LHSShiftArg = LHSShift.getOperand(0);
8654 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8655 SDValue RHSShiftArg = RHSShift.getOperand(0);
8656 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8657
8658 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8660 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8661 };
8662
8663 auto ApplyMasks = [&](SDValue Res) {
8664 // If there is an AND of either shifted operand, apply it to the result.
8665 if (LHSMask.getNode() || RHSMask.getNode()) {
8668
8669 if (LHSMask.getNode()) {
8670 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8671 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8672 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8673 }
8674 if (RHSMask.getNode()) {
8675 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8676 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8677 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8678 }
8679
8680 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8681 }
8682
8683 return Res;
8684 };
8685
8686 // TODO: Support pre-legalization funnel-shift by constant.
8687 bool IsRotate = LHSShiftArg == RHSShiftArg;
8688 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8689 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8690 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8691 // Look for a disguised rotate by constant.
8692 // The common shifted operand X may be hidden inside another 'or'.
8693 SDValue X, Y;
8694 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8695 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8696 return false;
8697 if (CommonOp == Or.getOperand(0)) {
8698 X = CommonOp;
8699 Y = Or.getOperand(1);
8700 return true;
8701 }
8702 if (CommonOp == Or.getOperand(1)) {
8703 X = CommonOp;
8704 Y = Or.getOperand(0);
8705 return true;
8706 }
8707 return false;
8708 };
8709
8710 SDValue Res;
8711 if (matchOr(LHSShiftArg, RHSShiftArg)) {
8712 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8713 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8714 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8715 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8716 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8717 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8718 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8719 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8720 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8721 } else {
8722 return SDValue();
8723 }
8724
8725 return ApplyMasks(Res);
8726 }
8727
8728 return SDValue(); // Requires funnel shift support.
8729 }
8730
8731 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8732 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8733 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8734 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8735 // iff C1+C2 == EltSizeInBits
8736 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8737 SDValue Res;
8738 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8739 bool UseROTL = !LegalOperations || HasROTL;
8740 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8741 UseROTL ? LHSShiftAmt : RHSShiftAmt);
8742 } else {
8743 bool UseFSHL = !LegalOperations || HasFSHL;
8744 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8745 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8746 }
8747
8748 return ApplyMasks(Res);
8749 }
8750
8751 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8752 // shift.
8753 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8754 return SDValue();
8755
8756 // If there is a mask here, and we have a variable shift, we can't be sure
8757 // that we're masking out the right stuff.
8758 if (LHSMask.getNode() || RHSMask.getNode())
8759 return SDValue();
8760
8761 // If the shift amount is sign/zext/any-extended just peel it off.
8762 SDValue LExtOp0 = LHSShiftAmt;
8763 SDValue RExtOp0 = RHSShiftAmt;
8764 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8765 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8766 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8767 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8768 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8769 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8770 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8771 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8772 LExtOp0 = LHSShiftAmt.getOperand(0);
8773 RExtOp0 = RHSShiftAmt.getOperand(0);
8774 }
8775
8776 if (IsRotate && (HasROTL || HasROTR)) {
8777 SDValue TryL =
8778 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8779 RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8780 if (TryL)
8781 return TryL;
8782
8783 SDValue TryR =
8784 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8785 LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8786 if (TryR)
8787 return TryR;
8788 }
8789
8790 SDValue TryL =
8791 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8792 LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8793 if (TryL)
8794 return TryL;
8795
8796 SDValue TryR =
8797 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8798 RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8799 if (TryR)
8800 return TryR;
8801
8802 return SDValue();
8803}
8804
8805/// Recursively traverses the expression calculating the origin of the requested
8806/// byte of the given value. Returns std::nullopt if the provider can't be
8807/// calculated.
8808///
8809/// For all the values except the root of the expression, we verify that the
8810/// value has exactly one use and if not then return std::nullopt. This way if
8811/// the origin of the byte is returned it's guaranteed that the values which
8812/// contribute to the byte are not used outside of this expression.
8813
8814/// However, there is a special case when dealing with vector loads -- we allow
8815/// more than one use if the load is a vector type. Since the values that
8816/// contribute to the byte ultimately come from the ExtractVectorElements of the
8817/// Load, we don't care if the Load has uses other than ExtractVectorElements,
8818/// because those operations are independent from the pattern to be combined.
8819/// For vector loads, we simply care that the ByteProviders are adjacent
8820/// positions of the same vector, and their index matches the byte that is being
8821/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8822/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8823/// byte position we are trying to provide for the LoadCombine. If these do
8824/// not match, then we can not combine the vector loads. \p Index uses the
8825/// byte position we are trying to provide for and is matched against the
8826/// shl and load size. The \p Index algorithm ensures the requested byte is
8827/// provided for by the pattern, and the pattern does not over provide bytes.
8828///
8829///
8830/// The supported LoadCombine pattern for vector loads is as follows
8831/// or
8832/// / \
8833/// or shl
8834/// / \ |
8835/// or shl zext
8836/// / \ | |
8837/// shl zext zext EVE*
8838/// | | | |
8839/// zext EVE* EVE* LOAD
8840/// | | |
8841/// EVE* LOAD LOAD
8842/// |
8843/// LOAD
8844///
8845/// *ExtractVectorElement
8847
8848static std::optional<SDByteProvider>
8849calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
8850 std::optional<uint64_t> VectorIndex,
8851 unsigned StartingIndex = 0) {
8852
8853 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8854 if (Depth == 10)
8855 return std::nullopt;
8856
8857 // Only allow multiple uses if the instruction is a vector load (in which
8858 // case we will use the load for every ExtractVectorElement)
8859 if (Depth && !Op.hasOneUse() &&
8860 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8861 return std::nullopt;
8862
8863 // Fail to combine if we have encountered anything but a LOAD after handling
8864 // an ExtractVectorElement.
8865 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8866 return std::nullopt;
8867
8868 unsigned BitWidth = Op.getValueSizeInBits();
8869 if (BitWidth % 8 != 0)
8870 return std::nullopt;
8871 unsigned ByteWidth = BitWidth / 8;
8872 assert(Index < ByteWidth && "invalid index requested");
8873 (void) ByteWidth;
8874
8875 switch (Op.getOpcode()) {
8876 case ISD::OR: {
8877 auto LHS =
8878 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8879 if (!LHS)
8880 return std::nullopt;
8881 auto RHS =
8882 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8883 if (!RHS)
8884 return std::nullopt;
8885
8886 if (LHS->isConstantZero())
8887 return RHS;
8888 if (RHS->isConstantZero())
8889 return LHS;
8890 return std::nullopt;
8891 }
8892 case ISD::SHL: {
8893 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8894 if (!ShiftOp)
8895 return std::nullopt;
8896
8897 uint64_t BitShift = ShiftOp->getZExtValue();
8898
8899 if (BitShift % 8 != 0)
8900 return std::nullopt;
8901 uint64_t ByteShift = BitShift / 8;
8902
8903 // If we are shifting by an amount greater than the index we are trying to
8904 // provide, then do not provide anything. Otherwise, subtract the index by
8905 // the amount we shifted by.
8906 return Index < ByteShift
8908 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8909 Depth + 1, VectorIndex, Index);
8910 }
8911 case ISD::ANY_EXTEND:
8912 case ISD::SIGN_EXTEND:
8913 case ISD::ZERO_EXTEND: {
8914 SDValue NarrowOp = Op->getOperand(0);
8915 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8916 if (NarrowBitWidth % 8 != 0)
8917 return std::nullopt;
8918 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8919
8920 if (Index >= NarrowByteWidth)
8921 return Op.getOpcode() == ISD::ZERO_EXTEND
8922 ? std::optional<SDByteProvider>(
8924 : std::nullopt;
8925 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8926 StartingIndex);
8927 }
8928 case ISD::BSWAP:
8929 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8930 Depth + 1, VectorIndex, StartingIndex);
8932 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8933 if (!OffsetOp)
8934 return std::nullopt;
8935
8936 VectorIndex = OffsetOp->getZExtValue();
8937
8938 SDValue NarrowOp = Op->getOperand(0);
8939 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8940 if (NarrowBitWidth % 8 != 0)
8941 return std::nullopt;
8942 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8943 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
8944 // type, leaving the high bits undefined.
8945 if (Index >= NarrowByteWidth)
8946 return std::nullopt;
8947
8948 // Check to see if the position of the element in the vector corresponds
8949 // with the byte we are trying to provide for. In the case of a vector of
8950 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8951 // the element will provide a range of bytes. For example, if we have a
8952 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8953 // 3).
8954 if (*VectorIndex * NarrowByteWidth > StartingIndex)
8955 return std::nullopt;
8956 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8957 return std::nullopt;
8958
8959 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8960 VectorIndex, StartingIndex);
8961 }
8962 case ISD::LOAD: {
8963 auto L = cast<LoadSDNode>(Op.getNode());
8964 if (!L->isSimple() || L->isIndexed())
8965 return std::nullopt;
8966
8967 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8968 if (NarrowBitWidth % 8 != 0)
8969 return std::nullopt;
8970 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8971
8972 // If the width of the load does not reach byte we are trying to provide for
8973 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8974 // question
8975 if (Index >= NarrowByteWidth)
8976 return L->getExtensionType() == ISD::ZEXTLOAD
8977 ? std::optional<SDByteProvider>(
8979 : std::nullopt;
8980
8981 unsigned BPVectorIndex = VectorIndex.value_or(0U);
8982 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8983 }
8984 }
8985
8986 return std::nullopt;
8987}
8988
8989static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8990 return i;
8991}
8992
8993static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8994 return BW - i - 1;
8995}
8996
8997// Check if the bytes offsets we are looking at match with either big or
8998// little endian value loaded. Return true for big endian, false for little
8999// endian, and std::nullopt if match failed.
9000static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
9001 int64_t FirstOffset) {
9002 // The endian can be decided only when it is 2 bytes at least.
9003 unsigned Width = ByteOffsets.size();
9004 if (Width < 2)
9005 return std::nullopt;
9006
9007 bool BigEndian = true, LittleEndian = true;
9008 for (unsigned i = 0; i < Width; i++) {
9009 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9010 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
9011 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
9012 if (!BigEndian && !LittleEndian)
9013 return std::nullopt;
9014 }
9015
9016 assert((BigEndian != LittleEndian) && "It should be either big endian or"
9017 "little endian");
9018 return BigEndian;
9019}
9020
9021// Look through one layer of truncate or extend.
9023 switch (Value.getOpcode()) {
9024 case ISD::TRUNCATE:
9025 case ISD::ZERO_EXTEND:
9026 case ISD::SIGN_EXTEND:
9027 case ISD::ANY_EXTEND:
9028 return Value.getOperand(0);
9029 }
9030 return SDValue();
9031}
9032
9033/// Match a pattern where a wide type scalar value is stored by several narrow
9034/// stores. Fold it into a single store or a BSWAP and a store if the targets
9035/// supports it.
9036///
9037/// Assuming little endian target:
9038/// i8 *p = ...
9039/// i32 val = ...
9040/// p[0] = (val >> 0) & 0xFF;
9041/// p[1] = (val >> 8) & 0xFF;
9042/// p[2] = (val >> 16) & 0xFF;
9043/// p[3] = (val >> 24) & 0xFF;
9044/// =>
9045/// *((i32)p) = val;
9046///
9047/// i8 *p = ...
9048/// i32 val = ...
9049/// p[0] = (val >> 24) & 0xFF;
9050/// p[1] = (val >> 16) & 0xFF;
9051/// p[2] = (val >> 8) & 0xFF;
9052/// p[3] = (val >> 0) & 0xFF;
9053/// =>
9054/// *((i32)p) = BSWAP(val);
9055SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
9056 // The matching looks for "store (trunc x)" patterns that appear early but are
9057 // likely to be replaced by truncating store nodes during combining.
9058 // TODO: If there is evidence that running this later would help, this
9059 // limitation could be removed. Legality checks may need to be added
9060 // for the created store and optional bswap/rotate.
9061 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
9062 return SDValue();
9063
9064 // We only handle merging simple stores of 1-4 bytes.
9065 // TODO: Allow unordered atomics when wider type is legal (see D66309)
9066 EVT MemVT = N->getMemoryVT();
9067 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
9068 !N->isSimple() || N->isIndexed())
9069 return SDValue();
9070
9071 // Collect all of the stores in the chain, upto the maximum store width (i64).
9072 SDValue Chain = N->getChain();
9074 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
9075 unsigned MaxWideNumBits = 64;
9076 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9077 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
9078 // All stores must be the same size to ensure that we are writing all of the
9079 // bytes in the wide value.
9080 // This store should have exactly one use as a chain operand for another
9081 // store in the merging set. If there are other chain uses, then the
9082 // transform may not be safe because order of loads/stores outside of this
9083 // set may not be preserved.
9084 // TODO: We could allow multiple sizes by tracking each stored byte.
9085 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9086 Store->isIndexed() || !Store->hasOneUse())
9087 return SDValue();
9088 Stores.push_back(Store);
9089 Chain = Store->getChain();
9090 if (MaxStores < Stores.size())
9091 return SDValue();
9092 }
9093 // There is no reason to continue if we do not have at least a pair of stores.
9094 if (Stores.size() < 2)
9095 return SDValue();
9096
9097 // Handle simple types only.
9098 LLVMContext &Context = *DAG.getContext();
9099 unsigned NumStores = Stores.size();
9100 unsigned WideNumBits = NumStores * NarrowNumBits;
9101 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
9102 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
9103 return SDValue();
9104
9105 // Check if all bytes of the source value that we are looking at are stored
9106 // to the same base address. Collect offsets from Base address into OffsetMap.
9107 SDValue SourceValue;
9108 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
9109 int64_t FirstOffset = INT64_MAX;
9110 StoreSDNode *FirstStore = nullptr;
9111 std::optional<BaseIndexOffset> Base;
9112 for (auto *Store : Stores) {
9113 // All the stores store different parts of the CombinedValue. A truncate is
9114 // required to get the partial value.
9115 SDValue Trunc = Store->getValue();
9116 if (Trunc.getOpcode() != ISD::TRUNCATE)
9117 return SDValue();
9118 // Other than the first/last part, a shift operation is required to get the
9119 // offset.
9120 int64_t Offset = 0;
9121 SDValue WideVal = Trunc.getOperand(0);
9122 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
9123 isa<ConstantSDNode>(WideVal.getOperand(1))) {
9124 // The shift amount must be a constant multiple of the narrow type.
9125 // It is translated to the offset address in the wide source value "y".
9126 //
9127 // x = srl y, ShiftAmtC
9128 // i8 z = trunc x
9129 // store z, ...
9130 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
9131 if (ShiftAmtC % NarrowNumBits != 0)
9132 return SDValue();
9133
9134 // Make sure we aren't reading bits that are shifted in.
9135 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9136 return SDValue();
9137
9138 Offset = ShiftAmtC / NarrowNumBits;
9139 WideVal = WideVal.getOperand(0);
9140 }
9141
9142 // Stores must share the same source value with different offsets.
9143 if (!SourceValue)
9144 SourceValue = WideVal;
9145 else if (SourceValue != WideVal) {
9146 // Truncate and extends can be stripped to see if the values are related.
9147 if (stripTruncAndExt(SourceValue) != WideVal &&
9148 stripTruncAndExt(WideVal) != SourceValue)
9149 return SDValue();
9150
9151 if (WideVal.getScalarValueSizeInBits() >
9152 SourceValue.getScalarValueSizeInBits())
9153 SourceValue = WideVal;
9154
9155 // Give up if the source value type is smaller than the store size.
9156 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
9157 return SDValue();
9158 }
9159
9160 // Stores must share the same base address.
9162 int64_t ByteOffsetFromBase = 0;
9163 if (!Base)
9164 Base = Ptr;
9165 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9166 return SDValue();
9167
9168 // Remember the first store.
9169 if (ByteOffsetFromBase < FirstOffset) {
9170 FirstStore = Store;
9171 FirstOffset = ByteOffsetFromBase;
9172 }
9173 // Map the offset in the store and the offset in the combined value, and
9174 // early return if it has been set before.
9175 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9176 return SDValue();
9177 OffsetMap[Offset] = ByteOffsetFromBase;
9178 }
9179
9180 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9181 assert(FirstStore && "First store must be set");
9182
9183 // Check that a store of the wide type is both allowed and fast on the target
9184 const DataLayout &Layout = DAG.getDataLayout();
9185 unsigned Fast = 0;
9186 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9187 *FirstStore->getMemOperand(), &Fast);
9188 if (!Allowed || !Fast)
9189 return SDValue();
9190
9191 // Check if the pieces of the value are going to the expected places in memory
9192 // to merge the stores.
9193 auto checkOffsets = [&](bool MatchLittleEndian) {
9194 if (MatchLittleEndian) {
9195 for (unsigned i = 0; i != NumStores; ++i)
9196 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9197 return false;
9198 } else { // MatchBigEndian by reversing loop counter.
9199 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9200 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9201 return false;
9202 }
9203 return true;
9204 };
9205
9206 // Check if the offsets line up for the native data layout of this target.
9207 bool NeedBswap = false;
9208 bool NeedRotate = false;
9209 if (!checkOffsets(Layout.isLittleEndian())) {
9210 // Special-case: check if byte offsets line up for the opposite endian.
9211 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9212 NeedBswap = true;
9213 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9214 NeedRotate = true;
9215 else
9216 return SDValue();
9217 }
9218
9219 SDLoc DL(N);
9220 if (WideVT != SourceValue.getValueType()) {
9221 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9222 "Unexpected store value to merge");
9223 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9224 }
9225
9226 // Before legalize we can introduce illegal bswaps/rotates which will be later
9227 // converted to an explicit bswap sequence. This way we end up with a single
9228 // store and byte shuffling instead of several stores and byte shuffling.
9229 if (NeedBswap) {
9230 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9231 } else if (NeedRotate) {
9232 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9233 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9234 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9235 }
9236
9237 SDValue NewStore =
9238 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9239 FirstStore->getPointerInfo(), FirstStore->getAlign());
9240
9241 // Rely on other DAG combine rules to remove the other individual stores.
9242 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9243 return NewStore;
9244}
9245
9246/// Match a pattern where a wide type scalar value is loaded by several narrow
9247/// loads and combined by shifts and ors. Fold it into a single load or a load
9248/// and a BSWAP if the targets supports it.
9249///
9250/// Assuming little endian target:
9251/// i8 *a = ...
9252/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9253/// =>
9254/// i32 val = *((i32)a)
9255///
9256/// i8 *a = ...
9257/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9258/// =>
9259/// i32 val = BSWAP(*((i32)a))
9260///
9261/// TODO: This rule matches complex patterns with OR node roots and doesn't
9262/// interact well with the worklist mechanism. When a part of the pattern is
9263/// updated (e.g. one of the loads) its direct users are put into the worklist,
9264/// but the root node of the pattern which triggers the load combine is not
9265/// necessarily a direct user of the changed node. For example, once the address
9266/// of t28 load is reassociated load combine won't be triggered:
9267/// t25: i32 = add t4, Constant:i32<2>
9268/// t26: i64 = sign_extend t25
9269/// t27: i64 = add t2, t26
9270/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9271/// t29: i32 = zero_extend t28
9272/// t32: i32 = shl t29, Constant:i8<8>
9273/// t33: i32 = or t23, t32
9274/// As a possible fix visitLoad can check if the load can be a part of a load
9275/// combine pattern and add corresponding OR roots to the worklist.
9276SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9277 assert(N->getOpcode() == ISD::OR &&
9278 "Can only match load combining against OR nodes");
9279
9280 // Handles simple types only
9281 EVT VT = N->getValueType(0);
9282 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9283 return SDValue();
9284 unsigned ByteWidth = VT.getSizeInBits() / 8;
9285
9286 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9287 auto MemoryByteOffset = [&](SDByteProvider P) {
9288 assert(P.hasSrc() && "Must be a memory byte provider");
9289 auto *Load = cast<LoadSDNode>(P.Src.value());
9290
9291 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9292
9293 assert(LoadBitWidth % 8 == 0 &&
9294 "can only analyze providers for individual bytes not bit");
9295 unsigned LoadByteWidth = LoadBitWidth / 8;
9296 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9297 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9298 };
9299
9300 std::optional<BaseIndexOffset> Base;
9301 SDValue Chain;
9302
9304 std::optional<SDByteProvider> FirstByteProvider;
9305 int64_t FirstOffset = INT64_MAX;
9306
9307 // Check if all the bytes of the OR we are looking at are loaded from the same
9308 // base address. Collect bytes offsets from Base address in ByteOffsets.
9309 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9310 unsigned ZeroExtendedBytes = 0;
9311 for (int i = ByteWidth - 1; i >= 0; --i) {
9312 auto P =
9313 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9314 /*StartingIndex*/ i);
9315 if (!P)
9316 return SDValue();
9317
9318 if (P->isConstantZero()) {
9319 // It's OK for the N most significant bytes to be 0, we can just
9320 // zero-extend the load.
9321 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9322 return SDValue();
9323 continue;
9324 }
9325 assert(P->hasSrc() && "provenance should either be memory or zero");
9326 auto *L = cast<LoadSDNode>(P->Src.value());
9327
9328 // All loads must share the same chain
9329 SDValue LChain = L->getChain();
9330 if (!Chain)
9331 Chain = LChain;
9332 else if (Chain != LChain)
9333 return SDValue();
9334
9335 // Loads must share the same base address
9337 int64_t ByteOffsetFromBase = 0;
9338
9339 // For vector loads, the expected load combine pattern will have an
9340 // ExtractElement for each index in the vector. While each of these
9341 // ExtractElements will be accessing the same base address as determined
9342 // by the load instruction, the actual bytes they interact with will differ
9343 // due to different ExtractElement indices. To accurately determine the
9344 // byte position of an ExtractElement, we offset the base load ptr with
9345 // the index multiplied by the byte size of each element in the vector.
9346 if (L->getMemoryVT().isVector()) {
9347 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9348 if (LoadWidthInBit % 8 != 0)
9349 return SDValue();
9350 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9351 Ptr.addToOffset(ByteOffsetFromVector);
9352 }
9353
9354 if (!Base)
9355 Base = Ptr;
9356
9357 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9358 return SDValue();
9359
9360 // Calculate the offset of the current byte from the base address
9361 ByteOffsetFromBase += MemoryByteOffset(*P);
9362 ByteOffsets[i] = ByteOffsetFromBase;
9363
9364 // Remember the first byte load
9365 if (ByteOffsetFromBase < FirstOffset) {
9366 FirstByteProvider = P;
9367 FirstOffset = ByteOffsetFromBase;
9368 }
9369
9370 Loads.insert(L);
9371 }
9372
9373 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9374 "memory, so there must be at least one load which produces the value");
9375 assert(Base && "Base address of the accessed memory location must be set");
9376 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9377
9378 bool NeedsZext = ZeroExtendedBytes > 0;
9379
9380 EVT MemVT =
9381 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9382
9383 if (!MemVT.isSimple())
9384 return SDValue();
9385
9386 // Before legalize we can introduce too wide illegal loads which will be later
9387 // split into legal sized loads. This enables us to combine i64 load by i8
9388 // patterns to a couple of i32 loads on 32 bit targets.
9389 if (LegalOperations &&
9391 MemVT))
9392 return SDValue();
9393
9394 // Check if the bytes of the OR we are looking at match with either big or
9395 // little endian value load
9396 std::optional<bool> IsBigEndian = isBigEndian(
9397 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9398 if (!IsBigEndian)
9399 return SDValue();
9400
9401 assert(FirstByteProvider && "must be set");
9402
9403 // Ensure that the first byte is loaded from zero offset of the first load.
9404 // So the combined value can be loaded from the first load address.
9405 if (MemoryByteOffset(*FirstByteProvider) != 0)
9406 return SDValue();
9407 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9408
9409 // The node we are looking at matches with the pattern, check if we can
9410 // replace it with a single (possibly zero-extended) load and bswap + shift if
9411 // needed.
9412
9413 // If the load needs byte swap check if the target supports it
9414 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9415
9416 // Before legalize we can introduce illegal bswaps which will be later
9417 // converted to an explicit bswap sequence. This way we end up with a single
9418 // load and byte shuffling instead of several loads and byte shuffling.
9419 // We do not introduce illegal bswaps when zero-extending as this tends to
9420 // introduce too many arithmetic instructions.
9421 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9422 !TLI.isOperationLegal(ISD::BSWAP, VT))
9423 return SDValue();
9424
9425 // If we need to bswap and zero extend, we have to insert a shift. Check that
9426 // it is legal.
9427 if (NeedsBswap && NeedsZext && LegalOperations &&
9428 !TLI.isOperationLegal(ISD::SHL, VT))
9429 return SDValue();
9430
9431 // Check that a load of the wide type is both allowed and fast on the target
9432 unsigned Fast = 0;
9433 bool Allowed =
9434 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9435 *FirstLoad->getMemOperand(), &Fast);
9436 if (!Allowed || !Fast)
9437 return SDValue();
9438
9439 SDValue NewLoad =
9440 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9441 Chain, FirstLoad->getBasePtr(),
9442 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9443
9444 // Transfer chain users from old loads to the new load.
9445 for (LoadSDNode *L : Loads)
9446 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9447
9448 if (!NeedsBswap)
9449 return NewLoad;
9450
9451 SDValue ShiftedLoad =
9452 NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9453 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9454 VT, SDLoc(N)))
9455 : NewLoad;
9456 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9457}
9458
9459// If the target has andn, bsl, or a similar bit-select instruction,
9460// we want to unfold masked merge, with canonical pattern of:
9461// | A | |B|
9462// ((x ^ y) & m) ^ y
9463// | D |
9464// Into:
9465// (x & m) | (y & ~m)
9466// If y is a constant, m is not a 'not', and the 'andn' does not work with
9467// immediates, we unfold into a different pattern:
9468// ~(~x & m) & (m | y)
9469// If x is a constant, m is a 'not', and the 'andn' does not work with
9470// immediates, we unfold into a different pattern:
9471// (x | ~m) & ~(~m & ~y)
9472// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9473// the very least that breaks andnpd / andnps patterns, and because those
9474// patterns are simplified in IR and shouldn't be created in the DAG
9475SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9476 assert(N->getOpcode() == ISD::XOR);
9477
9478 // Don't touch 'not' (i.e. where y = -1).
9479 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9480 return SDValue();
9481
9482 EVT VT = N->getValueType(0);
9483
9484 // There are 3 commutable operators in the pattern,
9485 // so we have to deal with 8 possible variants of the basic pattern.
9486 SDValue X, Y, M;
9487 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9488 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9489 return false;
9490 SDValue Xor = And.getOperand(XorIdx);
9491 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9492 return false;
9493 SDValue Xor0 = Xor.getOperand(0);
9494 SDValue Xor1 = Xor.getOperand(1);
9495 // Don't touch 'not' (i.e. where y = -1).
9496 if (isAllOnesOrAllOnesSplat(Xor1))
9497 return false;
9498 if (Other == Xor0)
9499 std::swap(Xor0, Xor1);
9500 if (Other != Xor1)
9501 return false;
9502 X = Xor0;
9503 Y = Xor1;
9504 M = And.getOperand(XorIdx ? 0 : 1);
9505 return true;
9506 };
9507
9508 SDValue N0 = N->getOperand(0);
9509 SDValue N1 = N->getOperand(1);
9510 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9511 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9512 return SDValue();
9513
9514 // Don't do anything if the mask is constant. This should not be reachable.
9515 // InstCombine should have already unfolded this pattern, and DAGCombiner
9516 // probably shouldn't produce it, too.
9517 if (isa<ConstantSDNode>(M.getNode()))
9518 return SDValue();
9519
9520 // We can transform if the target has AndNot
9521 if (!TLI.hasAndNot(M))
9522 return SDValue();
9523
9524 SDLoc DL(N);
9525
9526 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9527 // a bitwise not that would already allow ANDN to be used.
9528 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9529 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9530 // If not, we need to do a bit more work to make sure andn is still used.
9531 SDValue NotX = DAG.getNOT(DL, X, VT);
9532 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9533 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9534 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9535 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9536 }
9537
9538 // If X is a constant and M is a bitwise not, check that 'andn' works with
9539 // immediates.
9540 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9541 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9542 // If not, we need to do a bit more work to make sure andn is still used.
9543 SDValue NotM = M.getOperand(0);
9544 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9545 SDValue NotY = DAG.getNOT(DL, Y, VT);
9546 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9547 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9548 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9549 }
9550
9551 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9552 SDValue NotM = DAG.getNOT(DL, M, VT);
9553 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9554
9555 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9556}
9557
9558SDValue DAGCombiner::visitXOR(SDNode *N) {
9559 SDValue N0 = N->getOperand(0);
9560 SDValue N1 = N->getOperand(1);
9561 EVT VT = N0.getValueType();
9562 SDLoc DL(N);
9563
9564 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9565 if (N0.isUndef() && N1.isUndef())
9566 return DAG.getConstant(0, DL, VT);
9567
9568 // fold (xor x, undef) -> undef
9569 if (N0.isUndef())
9570 return N0;
9571 if (N1.isUndef())
9572 return N1;
9573
9574 // fold (xor c1, c2) -> c1^c2
9575 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9576 return C;
9577
9578 // canonicalize constant to RHS
9581 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9582
9583 // fold vector ops
9584 if (VT.isVector()) {
9585 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9586 return FoldedVOp;
9587
9588 // fold (xor x, 0) -> x, vector edition
9590 return N0;
9591 }
9592
9593 // fold (xor x, 0) -> x
9594 if (isNullConstant(N1))
9595 return N0;
9596
9597 if (SDValue NewSel = foldBinOpIntoSelect(N))
9598 return NewSel;
9599
9600 // reassociate xor
9601 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9602 return RXOR;
9603
9604 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9605 if (SDValue SD =
9606 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9607 return SD;
9608
9609 // fold (a^b) -> (a|b) iff a and b share no bits.
9610 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9611 DAG.haveNoCommonBitsSet(N0, N1))
9612 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
9613
9614 // look for 'add-like' folds:
9615 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9616 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9618 if (SDValue Combined = visitADDLike(N))
9619 return Combined;
9620
9621 // fold !(x cc y) -> (x !cc y)
9622 unsigned N0Opcode = N0.getOpcode();
9623 SDValue LHS, RHS, CC;
9624 if (TLI.isConstTrueVal(N1) &&
9625 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9626 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9627 LHS.getValueType());
9628 if (!LegalOperations ||
9629 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9630 switch (N0Opcode) {
9631 default:
9632 llvm_unreachable("Unhandled SetCC Equivalent!");
9633 case ISD::SETCC:
9634 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9635 case ISD::SELECT_CC:
9636 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9637 N0.getOperand(3), NotCC);
9638 case ISD::STRICT_FSETCC:
9639 case ISD::STRICT_FSETCCS: {
9640 if (N0.hasOneUse()) {
9641 // FIXME Can we handle multiple uses? Could we token factor the chain
9642 // results from the new/old setcc?
9643 SDValue SetCC =
9644 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9645 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9646 CombineTo(N, SetCC);
9647 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9648 recursivelyDeleteUnusedNodes(N0.getNode());
9649 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9650 }
9651 break;
9652 }
9653 }
9654 }
9655 }
9656
9657 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9658 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9659 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9660 SDValue V = N0.getOperand(0);
9661 SDLoc DL0(N0);
9662 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9663 DAG.getConstant(1, DL0, V.getValueType()));
9664 AddToWorklist(V.getNode());
9665 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9666 }
9667
9668 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9669 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
9670 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9671 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9672 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9673 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9674 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9675 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9676 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9677 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9678 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9679 }
9680 }
9681 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9682 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
9683 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9684 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9685 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9686 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9687 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9688 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9689 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9690 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9691 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9692 }
9693 }
9694
9695 // fold (not (neg x)) -> (add X, -1)
9696 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9697 // Y is a constant or the subtract has a single use.
9698 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9699 isNullConstant(N0.getOperand(0))) {
9700 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9701 DAG.getAllOnesConstant(DL, VT));
9702 }
9703
9704 // fold (not (add X, -1)) -> (neg X)
9705 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9707 return DAG.getNegative(N0.getOperand(0), DL, VT);
9708 }
9709
9710 // fold (xor (and x, y), y) -> (and (not x), y)
9711 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9712 SDValue X = N0.getOperand(0);
9713 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9714 AddToWorklist(NotX.getNode());
9715 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9716 }
9717
9718 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9719 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
9720 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9721 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9722 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9723 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9724 SDValue S0 = S.getOperand(0);
9725 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9727 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9728 return DAG.getNode(ISD::ABS, DL, VT, S0);
9729 }
9730 }
9731
9732 // fold (xor x, x) -> 0
9733 if (N0 == N1)
9734 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9735
9736 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9737 // Here is a concrete example of this equivalence:
9738 // i16 x == 14
9739 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
9740 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9741 //
9742 // =>
9743 //
9744 // i16 ~1 == 0b1111111111111110
9745 // i16 rol(~1, 14) == 0b1011111111111111
9746 //
9747 // Some additional tips to help conceptualize this transform:
9748 // - Try to see the operation as placing a single zero in a value of all ones.
9749 // - There exists no value for x which would allow the result to contain zero.
9750 // - Values of x larger than the bitwidth are undefined and do not require a
9751 // consistent result.
9752 // - Pushing the zero left requires shifting one bits in from the right.
9753 // A rotate left of ~1 is a nice way of achieving the desired result.
9754 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9756 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
9757 N0.getOperand(1));
9758 }
9759
9760 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
9761 if (N0Opcode == N1.getOpcode())
9762 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9763 return V;
9764
9765 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9766 return R;
9767 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9768 return R;
9769 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9770 return R;
9771
9772 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
9773 if (SDValue MM = unfoldMaskedMerge(N))
9774 return MM;
9775
9776 // Simplify the expression using non-local knowledge.
9778 return SDValue(N, 0);
9779
9780 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9781 return Combined;
9782
9783 return SDValue();
9784}
9785
9786/// If we have a shift-by-constant of a bitwise logic op that itself has a
9787/// shift-by-constant operand with identical opcode, we may be able to convert
9788/// that into 2 independent shifts followed by the logic op. This is a
9789/// throughput improvement.
9791 // Match a one-use bitwise logic op.
9792 SDValue LogicOp = Shift->getOperand(0);
9793 if (!LogicOp.hasOneUse())
9794 return SDValue();
9795
9796 unsigned LogicOpcode = LogicOp.getOpcode();
9797 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9798 LogicOpcode != ISD::XOR)
9799 return SDValue();
9800
9801 // Find a matching one-use shift by constant.
9802 unsigned ShiftOpcode = Shift->getOpcode();
9803 SDValue C1 = Shift->getOperand(1);
9804 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9805 assert(C1Node && "Expected a shift with constant operand");
9806 const APInt &C1Val = C1Node->getAPIntValue();
9807 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9808 const APInt *&ShiftAmtVal) {
9809 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9810 return false;
9811
9812 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9813 if (!ShiftCNode)
9814 return false;
9815
9816 // Capture the shifted operand and shift amount value.
9817 ShiftOp = V.getOperand(0);
9818 ShiftAmtVal = &ShiftCNode->getAPIntValue();
9819
9820 // Shift amount types do not have to match their operand type, so check that
9821 // the constants are the same width.
9822 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9823 return false;
9824
9825 // The fold is not valid if the sum of the shift values doesn't fit in the
9826 // given shift amount type.
9827 bool Overflow = false;
9828 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
9829 if (Overflow)
9830 return false;
9831
9832 // The fold is not valid if the sum of the shift values exceeds bitwidth.
9833 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
9834 return false;
9835
9836 return true;
9837 };
9838
9839 // Logic ops are commutative, so check each operand for a match.
9840 SDValue X, Y;
9841 const APInt *C0Val;
9842 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9843 Y = LogicOp.getOperand(1);
9844 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9845 Y = LogicOp.getOperand(0);
9846 else
9847 return SDValue();
9848
9849 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9850 SDLoc DL(Shift);
9851 EVT VT = Shift->getValueType(0);
9852 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9853 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9854 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9855 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9856 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
9857 LogicOp->getFlags());
9858}
9859
9860/// Handle transforms common to the three shifts, when the shift amount is a
9861/// constant.
9862/// We are looking for: (shift being one of shl/sra/srl)
9863/// shift (binop X, C0), C1
9864/// And want to transform into:
9865/// binop (shift X, C1), (shift C0, C1)
9866SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9867 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9868
9869 // Do not turn a 'not' into a regular xor.
9870 if (isBitwiseNot(N->getOperand(0)))
9871 return SDValue();
9872
9873 // The inner binop must be one-use, since we want to replace it.
9874 SDValue LHS = N->getOperand(0);
9875 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9876 return SDValue();
9877
9878 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9879 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9880 return R;
9881
9882 // We want to pull some binops through shifts, so that we have (and (shift))
9883 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
9884 // thing happens with address calculations, so it's important to canonicalize
9885 // it.
9886 switch (LHS.getOpcode()) {
9887 default:
9888 return SDValue();
9889 case ISD::OR:
9890 case ISD::XOR:
9891 case ISD::AND:
9892 break;
9893 case ISD::ADD:
9894 if (N->getOpcode() != ISD::SHL)
9895 return SDValue(); // only shl(add) not sr[al](add).
9896 break;
9897 }
9898
9899 // FIXME: disable this unless the input to the binop is a shift by a constant
9900 // or is copy/select. Enable this in other cases when figure out it's exactly
9901 // profitable.
9902 SDValue BinOpLHSVal = LHS.getOperand(0);
9903 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9904 BinOpLHSVal.getOpcode() == ISD::SRA ||
9905 BinOpLHSVal.getOpcode() == ISD::SRL) &&
9906 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9907 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9908 BinOpLHSVal.getOpcode() == ISD::SELECT;
9909
9910 if (!IsShiftByConstant && !IsCopyOrSelect)
9911 return SDValue();
9912
9913 if (IsCopyOrSelect && N->hasOneUse())
9914 return SDValue();
9915
9916 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9917 SDLoc DL(N);
9918 EVT VT = N->getValueType(0);
9919 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9920 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9921 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9922 N->getOperand(1));
9923 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9924 }
9925
9926 return SDValue();
9927}
9928
9929SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9930 assert(N->getOpcode() == ISD::TRUNCATE);
9931 assert(N->getOperand(0).getOpcode() == ISD::AND);
9932
9933 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9934 EVT TruncVT = N->getValueType(0);
9935 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9936 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9937 SDValue N01 = N->getOperand(0).getOperand(1);
9938 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9939 SDLoc DL(N);
9940 SDValue N00 = N->getOperand(0).getOperand(0);
9941 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9942 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9943 AddToWorklist(Trunc00.getNode());
9944 AddToWorklist(Trunc01.getNode());
9945 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9946 }
9947 }
9948
9949 return SDValue();
9950}
9951
9952SDValue DAGCombiner::visitRotate(SDNode *N) {
9953 SDLoc dl(N);
9954 SDValue N0 = N->getOperand(0);
9955 SDValue N1 = N->getOperand(1);
9956 EVT VT = N->getValueType(0);
9957 unsigned Bitsize = VT.getScalarSizeInBits();
9958
9959 // fold (rot x, 0) -> x
9960 if (isNullOrNullSplat(N1))
9961 return N0;
9962
9963 // fold (rot x, c) -> x iff (c % BitSize) == 0
9964 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9965 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9966 if (DAG.MaskedValueIsZero(N1, ModuloMask))
9967 return N0;
9968 }
9969
9970 // fold (rot x, c) -> (rot x, c % BitSize)
9971 bool OutOfRange = false;
9972 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9973 OutOfRange |= C->getAPIntValue().uge(Bitsize);
9974 return true;
9975 };
9976 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9977 EVT AmtVT = N1.getValueType();
9978 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9979 if (SDValue Amt =
9980 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9981 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9982 }
9983
9984 // rot i16 X, 8 --> bswap X
9985 auto *RotAmtC = isConstOrConstSplat(N1);
9986 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9987 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9988 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9989
9990 // Simplify the operands using demanded-bits information.
9992 return SDValue(N, 0);
9993
9994 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9995 if (N1.getOpcode() == ISD::TRUNCATE &&
9996 N1.getOperand(0).getOpcode() == ISD::AND) {
9997 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9998 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9999 }
10000
10001 unsigned NextOp = N0.getOpcode();
10002
10003 // fold (rot* (rot* x, c2), c1)
10004 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10005 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
10006 bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
10008 if (C1 && C2 && N1.getValueType() == N0.getOperand(1).getValueType()) {
10009 EVT ShiftVT = N1.getValueType();
10010 bool SameSide = (N->getOpcode() == NextOp);
10011 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
10012 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
10013 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10014 {N1, BitsizeC});
10015 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10016 {N0.getOperand(1), BitsizeC});
10017 if (Norm1 && Norm2)
10018 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
10019 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
10020 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
10021 {CombinedShift, BitsizeC});
10022 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
10023 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
10024 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10025 CombinedShiftNorm);
10026 }
10027 }
10028 }
10029 return SDValue();
10030}
10031
10032SDValue DAGCombiner::visitSHL(SDNode *N) {
10033 SDValue N0 = N->getOperand(0);
10034 SDValue N1 = N->getOperand(1);
10035 if (SDValue V = DAG.simplifyShift(N0, N1))
10036 return V;
10037
10038 SDLoc DL(N);
10039 EVT VT = N0.getValueType();
10040 EVT ShiftVT = N1.getValueType();
10041 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10042
10043 // fold (shl c1, c2) -> c1<<c2
10044 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
10045 return C;
10046
10047 // fold vector ops
10048 if (VT.isVector()) {
10049 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10050 return FoldedVOp;
10051
10052 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
10053 // If setcc produces all-one true value then:
10054 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10055 if (N1CV && N1CV->isConstant()) {
10056 if (N0.getOpcode() == ISD::AND) {
10057 SDValue N00 = N0->getOperand(0);
10058 SDValue N01 = N0->getOperand(1);
10059 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
10060
10061 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10064 if (SDValue C =
10065 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
10066 return DAG.getNode(ISD::AND, DL, VT, N00, C);
10067 }
10068 }
10069 }
10070 }
10071
10072 if (SDValue NewSel = foldBinOpIntoSelect(N))
10073 return NewSel;
10074
10075 // if (shl x, c) is known to be zero, return 0
10076 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10077 return DAG.getConstant(0, DL, VT);
10078
10079 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10080 if (N1.getOpcode() == ISD::TRUNCATE &&
10081 N1.getOperand(0).getOpcode() == ISD::AND) {
10082 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10083 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
10084 }
10085
10086 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10087 if (N0.getOpcode() == ISD::SHL) {
10088 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10090 APInt c1 = LHS->getAPIntValue();
10091 APInt c2 = RHS->getAPIntValue();
10092 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10093 return (c1 + c2).uge(OpSizeInBits);
10094 };
10095 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10096 return DAG.getConstant(0, DL, VT);
10097
10098 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10100 APInt c1 = LHS->getAPIntValue();
10101 APInt c2 = RHS->getAPIntValue();
10102 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10103 return (c1 + c2).ult(OpSizeInBits);
10104 };
10105 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10106 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10107 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
10108 }
10109 }
10110
10111 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10112 // For this to be valid, the second form must not preserve any of the bits
10113 // that are shifted out by the inner shift in the first form. This means
10114 // the outer shift size must be >= the number of bits added by the ext.
10115 // As a corollary, we don't care what kind of ext it is.
10116 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
10117 N0.getOpcode() == ISD::ANY_EXTEND ||
10118 N0.getOpcode() == ISD::SIGN_EXTEND) &&
10119 N0.getOperand(0).getOpcode() == ISD::SHL) {
10120 SDValue N0Op0 = N0.getOperand(0);
10121 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10122 EVT InnerVT = N0Op0.getValueType();
10123 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
10124
10125 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10127 APInt c1 = LHS->getAPIntValue();
10128 APInt c2 = RHS->getAPIntValue();
10129 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10130 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10131 (c1 + c2).uge(OpSizeInBits);
10132 };
10133 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
10134 /*AllowUndefs*/ false,
10135 /*AllowTypeMismatch*/ true))
10136 return DAG.getConstant(0, DL, VT);
10137
10138 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10140 APInt c1 = LHS->getAPIntValue();
10141 APInt c2 = RHS->getAPIntValue();
10142 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10143 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10144 (c1 + c2).ult(OpSizeInBits);
10145 };
10146 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10147 /*AllowUndefs*/ false,
10148 /*AllowTypeMismatch*/ true)) {
10149 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10150 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10151 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10152 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10153 }
10154 }
10155
10156 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10157 // Only fold this if the inner zext has no other uses to avoid increasing
10158 // the total number of instructions.
10159 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10160 N0.getOperand(0).getOpcode() == ISD::SRL) {
10161 SDValue N0Op0 = N0.getOperand(0);
10162 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10163
10164 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10165 APInt c1 = LHS->getAPIntValue();
10166 APInt c2 = RHS->getAPIntValue();
10167 zeroExtendToMatch(c1, c2);
10168 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10169 };
10170 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10171 /*AllowUndefs*/ false,
10172 /*AllowTypeMismatch*/ true)) {
10173 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10174 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10175 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10176 AddToWorklist(NewSHL.getNode());
10177 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10178 }
10179 }
10180
10181 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10182 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10184 const APInt &LHSC = LHS->getAPIntValue();
10185 const APInt &RHSC = RHS->getAPIntValue();
10186 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10187 LHSC.getZExtValue() <= RHSC.getZExtValue();
10188 };
10189
10190 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10191 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10192 if (N0->getFlags().hasExact()) {
10193 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10194 /*AllowUndefs*/ false,
10195 /*AllowTypeMismatch*/ true)) {
10196 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10197 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10198 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10199 }
10200 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10201 /*AllowUndefs*/ false,
10202 /*AllowTypeMismatch*/ true)) {
10203 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10204 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10205 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10206 }
10207 }
10208
10209 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10210 // (and (srl x, (sub c1, c2), MASK)
10211 // Only fold this if the inner shift has no other uses -- if it does,
10212 // folding this will increase the total number of instructions.
10213 if (N0.getOpcode() == ISD::SRL &&
10214 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10216 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10217 /*AllowUndefs*/ false,
10218 /*AllowTypeMismatch*/ true)) {
10219 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10220 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10221 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10222 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10223 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10224 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10225 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10226 }
10227 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10228 /*AllowUndefs*/ false,
10229 /*AllowTypeMismatch*/ true)) {
10230 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10231 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10232 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10233 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10234 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10235 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10236 }
10237 }
10238 }
10239
10240 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10241 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10242 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10243 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10244 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10245 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10246 }
10247
10248 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10249 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10250 // Variant of version done on multiply, except mul by a power of 2 is turned
10251 // into a shift.
10252 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10253 TLI.isDesirableToCommuteWithShift(N, Level)) {
10254 SDValue N01 = N0.getOperand(1);
10255 if (SDValue Shl1 =
10256 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10257 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10258 AddToWorklist(Shl0.getNode());
10260 // Preserve the disjoint flag for Or.
10261 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10263 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10264 }
10265 }
10266
10267 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10268 // TODO: Add zext/add_nuw variant with suitable test coverage
10269 // TODO: Should we limit this with isLegalAddImmediate?
10270 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10271 N0.getOperand(0).getOpcode() == ISD::ADD &&
10272 N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
10273 TLI.isDesirableToCommuteWithShift(N, Level)) {
10274 SDValue Add = N0.getOperand(0);
10275 SDLoc DL(N0);
10276 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10277 {Add.getOperand(1)})) {
10278 if (SDValue ShlC =
10279 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10280 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10281 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10282 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10283 }
10284 }
10285 }
10286
10287 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10288 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10289 SDValue N01 = N0.getOperand(1);
10290 if (SDValue Shl =
10291 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10292 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10293 }
10294
10296 if (N1C && !N1C->isOpaque())
10297 if (SDValue NewSHL = visitShiftByConstant(N))
10298 return NewSHL;
10299
10300 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10301 // target.
10302 if (((N1.getOpcode() == ISD::CTTZ &&
10303 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10304 N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
10305 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10307 SDValue Y = N1.getOperand(0);
10308 SDLoc DL(N);
10309 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10310 SDValue And =
10311 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10312 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10313 }
10314
10316 return SDValue(N, 0);
10317
10318 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10319 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10320 const APInt &C0 = N0.getConstantOperandAPInt(0);
10321 const APInt &C1 = N1C->getAPIntValue();
10322 return DAG.getVScale(DL, VT, C0 << C1);
10323 }
10324
10325 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10326 APInt ShlVal;
10327 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10328 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10329 const APInt &C0 = N0.getConstantOperandAPInt(0);
10330 if (ShlVal.ult(C0.getBitWidth())) {
10331 APInt NewStep = C0 << ShlVal;
10332 return DAG.getStepVector(DL, VT, NewStep);
10333 }
10334 }
10335
10336 return SDValue();
10337}
10338
10339// Transform a right shift of a multiply into a multiply-high.
10340// Examples:
10341// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10342// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10344 const TargetLowering &TLI) {
10345 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10346 "SRL or SRA node is required here!");
10347
10348 // Check the shift amount. Proceed with the transformation if the shift
10349 // amount is constant.
10350 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10351 if (!ShiftAmtSrc)
10352 return SDValue();
10353
10354 // The operation feeding into the shift must be a multiply.
10355 SDValue ShiftOperand = N->getOperand(0);
10356 if (ShiftOperand.getOpcode() != ISD::MUL)
10357 return SDValue();
10358
10359 // Both operands must be equivalent extend nodes.
10360 SDValue LeftOp = ShiftOperand.getOperand(0);
10361 SDValue RightOp = ShiftOperand.getOperand(1);
10362
10363 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10364 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10365
10366 if (!IsSignExt && !IsZeroExt)
10367 return SDValue();
10368
10369 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10370 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10371
10372 // return true if U may use the lower bits of its operands
10373 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10374 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10375 return true;
10376 }
10377 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10378 if (!UShiftAmtSrc) {
10379 return true;
10380 }
10381 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10382 return UShiftAmt < NarrowVTSize;
10383 };
10384
10385 // If the lower part of the MUL is also used and MUL_LOHI is supported
10386 // do not introduce the MULH in favor of MUL_LOHI
10387 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10388 if (!ShiftOperand.hasOneUse() &&
10389 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10390 llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
10391 return SDValue();
10392 }
10393
10394 SDValue MulhRightOp;
10396 unsigned ActiveBits = IsSignExt
10397 ? Constant->getAPIntValue().getSignificantBits()
10398 : Constant->getAPIntValue().getActiveBits();
10399 if (ActiveBits > NarrowVTSize)
10400 return SDValue();
10401 MulhRightOp = DAG.getConstant(
10402 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10403 NarrowVT);
10404 } else {
10405 if (LeftOp.getOpcode() != RightOp.getOpcode())
10406 return SDValue();
10407 // Check that the two extend nodes are the same type.
10408 if (NarrowVT != RightOp.getOperand(0).getValueType())
10409 return SDValue();
10410 MulhRightOp = RightOp.getOperand(0);
10411 }
10412
10413 EVT WideVT = LeftOp.getValueType();
10414 // Proceed with the transformation if the wide types match.
10415 assert((WideVT == RightOp.getValueType()) &&
10416 "Cannot have a multiply node with two different operand types.");
10417
10418 // Proceed with the transformation if the wide type is twice as large
10419 // as the narrow type.
10420 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10421 return SDValue();
10422
10423 // Check the shift amount with the narrow type size.
10424 // Proceed with the transformation if the shift amount is the width
10425 // of the narrow type.
10426 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10427 if (ShiftAmt != NarrowVTSize)
10428 return SDValue();
10429
10430 // If the operation feeding into the MUL is a sign extend (sext),
10431 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10432 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10433
10434 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10435 // or if it is a vector type then we could transform to an acceptable type and
10436 // rely on legalization to split/combine the result.
10437 if (NarrowVT.isVector()) {
10438 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10439 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10440 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10441 return SDValue();
10442 } else {
10443 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10444 return SDValue();
10445 }
10446
10447 SDValue Result =
10448 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10449 bool IsSigned = N->getOpcode() == ISD::SRA;
10450 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10451}
10452
10453// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10454// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10456 unsigned Opcode = N->getOpcode();
10457 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10458 return SDValue();
10459
10460 SDValue N0 = N->getOperand(0);
10461 EVT VT = N->getValueType(0);
10462 SDLoc DL(N);
10463 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10464 SDValue OldLHS = N0.getOperand(0);
10465 SDValue OldRHS = N0.getOperand(1);
10466
10467 // If both operands are bswap/bitreverse, ignore the multiuse
10468 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10469 if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10470 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10471 OldRHS.getOperand(0));
10472 }
10473
10474 if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10475 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10476 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10477 NewBitReorder);
10478 }
10479
10480 if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10481 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10482 return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10483 OldRHS.getOperand(0));
10484 }
10485 }
10486 return SDValue();
10487}
10488
10489SDValue DAGCombiner::visitSRA(SDNode *N) {
10490 SDValue N0 = N->getOperand(0);
10491 SDValue N1 = N->getOperand(1);
10492 if (SDValue V = DAG.simplifyShift(N0, N1))
10493 return V;
10494
10495 SDLoc DL(N);
10496 EVT VT = N0.getValueType();
10497 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10498
10499 // fold (sra c1, c2) -> (sra c1, c2)
10500 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10501 return C;
10502
10503 // Arithmetic shifting an all-sign-bit value is a no-op.
10504 // fold (sra 0, x) -> 0
10505 // fold (sra -1, x) -> -1
10506 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10507 return N0;
10508
10509 // fold vector ops
10510 if (VT.isVector())
10511 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10512 return FoldedVOp;
10513
10514 if (SDValue NewSel = foldBinOpIntoSelect(N))
10515 return NewSel;
10516
10518
10519 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10520 // clamp (add c1, c2) to max shift.
10521 if (N0.getOpcode() == ISD::SRA) {
10522 EVT ShiftVT = N1.getValueType();
10523 EVT ShiftSVT = ShiftVT.getScalarType();
10524 SmallVector<SDValue, 16> ShiftValues;
10525
10526 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10527 APInt c1 = LHS->getAPIntValue();
10528 APInt c2 = RHS->getAPIntValue();
10529 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10530 APInt Sum = c1 + c2;
10531 unsigned ShiftSum =
10532 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10533 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10534 return true;
10535 };
10536 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10537 SDValue ShiftValue;
10538 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10539 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10540 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10541 assert(ShiftValues.size() == 1 &&
10542 "Expected matchBinaryPredicate to return one element for "
10543 "SPLAT_VECTORs");
10544 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10545 } else
10546 ShiftValue = ShiftValues[0];
10547 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10548 }
10549 }
10550
10551 // fold (sra (shl X, m), (sub result_size, n))
10552 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10553 // result_size - n != m.
10554 // If truncate is free for the target sext(shl) is likely to result in better
10555 // code.
10556 if (N0.getOpcode() == ISD::SHL && N1C) {
10557 // Get the two constants of the shifts, CN0 = m, CN = n.
10558 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10559 if (N01C) {
10560 LLVMContext &Ctx = *DAG.getContext();
10561 // Determine what the truncate's result bitsize and type would be.
10562 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10563
10564 if (VT.isVector())
10565 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10566
10567 // Determine the residual right-shift amount.
10568 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10569
10570 // If the shift is not a no-op (in which case this should be just a sign
10571 // extend already), the truncated to type is legal, sign_extend is legal
10572 // on that type, and the truncate to that type is both legal and free,
10573 // perform the transform.
10574 if ((ShiftAmt > 0) &&
10577 TLI.isTruncateFree(VT, TruncVT)) {
10578 SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
10579 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10580 N0.getOperand(0), Amt);
10581 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10582 Shift);
10583 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10584 N->getValueType(0), Trunc);
10585 }
10586 }
10587 }
10588
10589 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10590 // sra (add (shl X, N1C), AddC), N1C -->
10591 // sext (add (trunc X to (width - N1C)), AddC')
10592 // sra (sub AddC, (shl X, N1C)), N1C -->
10593 // sext (sub AddC1',(trunc X to (width - N1C)))
10594 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10595 N0.hasOneUse()) {
10596 bool IsAdd = N0.getOpcode() == ISD::ADD;
10597 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10598 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10599 Shl.hasOneUse()) {
10600 // TODO: AddC does not need to be a splat.
10601 if (ConstantSDNode *AddC =
10602 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10603 // Determine what the truncate's type would be and ask the target if
10604 // that is a free operation.
10605 LLVMContext &Ctx = *DAG.getContext();
10606 unsigned ShiftAmt = N1C->getZExtValue();
10607 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10608 if (VT.isVector())
10609 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10610
10611 // TODO: The simple type check probably belongs in the default hook
10612 // implementation and/or target-specific overrides (because
10613 // non-simple types likely require masking when legalized), but
10614 // that restriction may conflict with other transforms.
10615 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10616 TLI.isTruncateFree(VT, TruncVT)) {
10617 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10618 SDValue ShiftC =
10619 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10620 TruncVT.getScalarSizeInBits()),
10621 DL, TruncVT);
10622 SDValue Add;
10623 if (IsAdd)
10624 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10625 else
10626 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10627 return DAG.getSExtOrTrunc(Add, DL, VT);
10628 }
10629 }
10630 }
10631 }
10632
10633 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10634 if (N1.getOpcode() == ISD::TRUNCATE &&
10635 N1.getOperand(0).getOpcode() == ISD::AND) {
10636 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10637 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
10638 }
10639
10640 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10641 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10642 // if c1 is equal to the number of bits the trunc removes
10643 // TODO - support non-uniform vector shift amounts.
10644 if (N0.getOpcode() == ISD::TRUNCATE &&
10645 (N0.getOperand(0).getOpcode() == ISD::SRL ||
10646 N0.getOperand(0).getOpcode() == ISD::SRA) &&
10647 N0.getOperand(0).hasOneUse() &&
10648 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10649 SDValue N0Op0 = N0.getOperand(0);
10650 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10651 EVT LargeVT = N0Op0.getValueType();
10652 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10653 if (LargeShift->getAPIntValue() == TruncBits) {
10654 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10655 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10656 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10657 DAG.getConstant(TruncBits, DL, LargeShiftVT));
10658 SDValue SRA =
10659 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10660 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10661 }
10662 }
10663 }
10664
10665 // Simplify, based on bits shifted out of the LHS.
10667 return SDValue(N, 0);
10668
10669 // If the sign bit is known to be zero, switch this to a SRL.
10670 if (DAG.SignBitIsZero(N0))
10671 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
10672
10673 if (N1C && !N1C->isOpaque())
10674 if (SDValue NewSRA = visitShiftByConstant(N))
10675 return NewSRA;
10676
10677 // Try to transform this shift into a multiply-high if
10678 // it matches the appropriate pattern detected in combineShiftToMULH.
10679 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10680 return MULH;
10681
10682 // Attempt to convert a sra of a load into a narrower sign-extending load.
10683 if (SDValue NarrowLoad = reduceLoadWidth(N))
10684 return NarrowLoad;
10685
10686 if (SDValue AVG = foldShiftToAvg(N))
10687 return AVG;
10688
10689 return SDValue();
10690}
10691
10692SDValue DAGCombiner::visitSRL(SDNode *N) {
10693 SDValue N0 = N->getOperand(0);
10694 SDValue N1 = N->getOperand(1);
10695 if (SDValue V = DAG.simplifyShift(N0, N1))
10696 return V;
10697
10698 SDLoc DL(N);
10699 EVT VT = N0.getValueType();
10700 EVT ShiftVT = N1.getValueType();
10701 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10702
10703 // fold (srl c1, c2) -> c1 >>u c2
10704 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
10705 return C;
10706
10707 // fold vector ops
10708 if (VT.isVector())
10709 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10710 return FoldedVOp;
10711
10712 if (SDValue NewSel = foldBinOpIntoSelect(N))
10713 return NewSel;
10714
10715 // if (srl x, c) is known to be zero, return 0
10717 if (N1C &&
10718 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10719 return DAG.getConstant(0, DL, VT);
10720
10721 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10722 if (N0.getOpcode() == ISD::SRL) {
10723 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10725 APInt c1 = LHS->getAPIntValue();
10726 APInt c2 = RHS->getAPIntValue();
10727 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10728 return (c1 + c2).uge(OpSizeInBits);
10729 };
10730 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10731 return DAG.getConstant(0, DL, VT);
10732
10733 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10735 APInt c1 = LHS->getAPIntValue();
10736 APInt c2 = RHS->getAPIntValue();
10737 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10738 return (c1 + c2).ult(OpSizeInBits);
10739 };
10740 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10741 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10742 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10743 }
10744 }
10745
10746 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10747 N0.getOperand(0).getOpcode() == ISD::SRL) {
10748 SDValue InnerShift = N0.getOperand(0);
10749 // TODO - support non-uniform vector shift amounts.
10750 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10751 uint64_t c1 = N001C->getZExtValue();
10752 uint64_t c2 = N1C->getZExtValue();
10753 EVT InnerShiftVT = InnerShift.getValueType();
10754 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10755 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10756 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10757 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10758 if (c1 + OpSizeInBits == InnerShiftSize) {
10759 if (c1 + c2 >= InnerShiftSize)
10760 return DAG.getConstant(0, DL, VT);
10761 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10762 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10763 InnerShift.getOperand(0), NewShiftAmt);
10764 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10765 }
10766 // In the more general case, we can clear the high bits after the shift:
10767 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10768 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10769 c1 + c2 < InnerShiftSize) {
10770 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10771 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10772 InnerShift.getOperand(0), NewShiftAmt);
10773 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10774 OpSizeInBits - c2),
10775 DL, InnerShiftVT);
10776 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10777 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10778 }
10779 }
10780 }
10781
10782 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10783 // (and (srl x, (sub c2, c1), MASK)
10784 if (N0.getOpcode() == ISD::SHL &&
10785 (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10787 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10789 const APInt &LHSC = LHS->getAPIntValue();
10790 const APInt &RHSC = RHS->getAPIntValue();
10791 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10792 LHSC.getZExtValue() <= RHSC.getZExtValue();
10793 };
10794 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10795 /*AllowUndefs*/ false,
10796 /*AllowTypeMismatch*/ true)) {
10797 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10798 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10799 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10800 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10801 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10802 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10803 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10804 }
10805 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10806 /*AllowUndefs*/ false,
10807 /*AllowTypeMismatch*/ true)) {
10808 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10809 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10810 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10811 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10812 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10813 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10814 }
10815 }
10816
10817 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10818 // TODO - support non-uniform vector shift amounts.
10819 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10820 // Shifting in all undef bits?
10821 EVT SmallVT = N0.getOperand(0).getValueType();
10822 unsigned BitSize = SmallVT.getScalarSizeInBits();
10823 if (N1C->getAPIntValue().uge(BitSize))
10824 return DAG.getUNDEF(VT);
10825
10826 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10827 uint64_t ShiftAmt = N1C->getZExtValue();
10828 SDLoc DL0(N0);
10829 SDValue SmallShift =
10830 DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
10831 DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
10832 AddToWorklist(SmallShift.getNode());
10833 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10834 return DAG.getNode(ISD::AND, DL, VT,
10835 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10836 DAG.getConstant(Mask, DL, VT));
10837 }
10838 }
10839
10840 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
10841 // bit, which is unmodified by sra.
10842 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10843 if (N0.getOpcode() == ISD::SRA)
10844 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
10845 }
10846
10847 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
10848 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10849 if (N1C && N0.getOpcode() == ISD::CTLZ &&
10850 isPowerOf2_32(OpSizeInBits) &&
10851 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10852 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10853
10854 // If any of the input bits are KnownOne, then the input couldn't be all
10855 // zeros, thus the result of the srl will always be zero.
10856 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10857
10858 // If all of the bits input the to ctlz node are known to be zero, then
10859 // the result of the ctlz is "32" and the result of the shift is one.
10860 APInt UnknownBits = ~Known.Zero;
10861 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10862
10863 // Otherwise, check to see if there is exactly one bit input to the ctlz.
10864 if (UnknownBits.isPowerOf2()) {
10865 // Okay, we know that only that the single bit specified by UnknownBits
10866 // could be set on input to the CTLZ node. If this bit is set, the SRL
10867 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10868 // to an SRL/XOR pair, which is likely to simplify more.
10869 unsigned ShAmt = UnknownBits.countr_zero();
10870 SDValue Op = N0.getOperand(0);
10871
10872 if (ShAmt) {
10873 SDLoc DL(N0);
10874 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10875 DAG.getShiftAmountConstant(ShAmt, VT, DL));
10876 AddToWorklist(Op.getNode());
10877 }
10878 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
10879 }
10880 }
10881
10882 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10883 if (N1.getOpcode() == ISD::TRUNCATE &&
10884 N1.getOperand(0).getOpcode() == ISD::AND) {
10885 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10886 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
10887 }
10888
10889 // fold operands of srl based on knowledge that the low bits are not
10890 // demanded.
10892 return SDValue(N, 0);
10893
10894 if (N1C && !N1C->isOpaque())
10895 if (SDValue NewSRL = visitShiftByConstant(N))
10896 return NewSRL;
10897
10898 // Attempt to convert a srl of a load into a narrower zero-extending load.
10899 if (SDValue NarrowLoad = reduceLoadWidth(N))
10900 return NarrowLoad;
10901
10902 // Here is a common situation. We want to optimize:
10903 //
10904 // %a = ...
10905 // %b = and i32 %a, 2
10906 // %c = srl i32 %b, 1
10907 // brcond i32 %c ...
10908 //
10909 // into
10910 //
10911 // %a = ...
10912 // %b = and %a, 2
10913 // %c = setcc eq %b, 0
10914 // brcond %c ...
10915 //
10916 // However when after the source operand of SRL is optimized into AND, the SRL
10917 // itself may not be optimized further. Look for it and add the BRCOND into
10918 // the worklist.
10919 //
10920 // The also tends to happen for binary operations when SimplifyDemandedBits
10921 // is involved.
10922 //
10923 // FIXME: This is unecessary if we process the DAG in topological order,
10924 // which we plan to do. This workaround can be removed once the DAG is
10925 // processed in topological order.
10926 if (N->hasOneUse()) {
10927 SDNode *User = *N->user_begin();
10928
10929 // Look pass the truncate.
10930 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
10931 User = *User->user_begin();
10932
10933 if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
10934 User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
10935 AddToWorklist(User);
10936 }
10937
10938 // Try to transform this shift into a multiply-high if
10939 // it matches the appropriate pattern detected in combineShiftToMULH.
10940 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10941 return MULH;
10942
10943 if (SDValue AVG = foldShiftToAvg(N))
10944 return AVG;
10945
10946 return SDValue();
10947}
10948
10949SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10950 EVT VT = N->getValueType(0);
10951 SDValue N0 = N->getOperand(0);
10952 SDValue N1 = N->getOperand(1);
10953 SDValue N2 = N->getOperand(2);
10954 bool IsFSHL = N->getOpcode() == ISD::FSHL;
10955 unsigned BitWidth = VT.getScalarSizeInBits();
10956 SDLoc DL(N);
10957
10958 // fold (fshl N0, N1, 0) -> N0
10959 // fold (fshr N0, N1, 0) -> N1
10961 if (DAG.MaskedValueIsZero(
10962 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10963 return IsFSHL ? N0 : N1;
10964
10965 auto IsUndefOrZero = [](SDValue V) {
10966 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10967 };
10968
10969 // TODO - support non-uniform vector shift amounts.
10970 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10971 EVT ShAmtTy = N2.getValueType();
10972
10973 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10974 if (Cst->getAPIntValue().uge(BitWidth)) {
10975 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10976 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
10977 DAG.getConstant(RotAmt, DL, ShAmtTy));
10978 }
10979
10980 unsigned ShAmt = Cst->getZExtValue();
10981 if (ShAmt == 0)
10982 return IsFSHL ? N0 : N1;
10983
10984 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10985 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10986 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10987 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10988 if (IsUndefOrZero(N0))
10989 return DAG.getNode(
10990 ISD::SRL, DL, VT, N1,
10991 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
10992 if (IsUndefOrZero(N1))
10993 return DAG.getNode(
10994 ISD::SHL, DL, VT, N0,
10995 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
10996
10997 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10998 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10999 // TODO - bigendian support once we have test coverage.
11000 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11001 // TODO - permit LHS EXTLOAD if extensions are shifted out.
11002 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
11003 !DAG.getDataLayout().isBigEndian()) {
11004 auto *LHS = dyn_cast<LoadSDNode>(N0);
11005 auto *RHS = dyn_cast<LoadSDNode>(N1);
11006 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11007 LHS->getAddressSpace() == RHS->getAddressSpace() &&
11008 (LHS->hasNUsesOfValue(1, 0) || RHS->hasNUsesOfValue(1, 0)) &&
11010 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
11011 SDLoc DL(RHS);
11012 uint64_t PtrOff =
11013 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11014 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11015 unsigned Fast = 0;
11016 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
11017 RHS->getAddressSpace(), NewAlign,
11018 RHS->getMemOperand()->getFlags(), &Fast) &&
11019 Fast) {
11020 SDValue NewPtr = DAG.getMemBasePlusOffset(
11021 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11022 AddToWorklist(NewPtr.getNode());
11023 SDValue Load = DAG.getLoad(
11024 VT, DL, RHS->getChain(), NewPtr,
11025 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11026 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11027 DAG.makeEquivalentMemoryOrdering(LHS, Load.getValue(1));
11028 DAG.makeEquivalentMemoryOrdering(RHS, Load.getValue(1));
11029 return Load;
11030 }
11031 }
11032 }
11033 }
11034 }
11035
11036 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11037 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11038 // iff We know the shift amount is in range.
11039 // TODO: when is it worth doing SUB(BW, N2) as well?
11040 if (isPowerOf2_32(BitWidth)) {
11041 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11042 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11043 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
11044 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11045 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
11046 }
11047
11048 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11049 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11050 // TODO: Investigate flipping this rotate if only one is legal.
11051 // If funnel shift is legal as well we might be better off avoiding
11052 // non-constant (BW - N2).
11053 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
11054 if (N0 == N1 && hasOperation(RotOpc, VT))
11055 return DAG.getNode(RotOpc, DL, VT, N0, N2);
11056
11057 // Simplify, based on bits shifted out of N0/N1.
11059 return SDValue(N, 0);
11060
11061 return SDValue();
11062}
11063
11064SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
11065 SDValue N0 = N->getOperand(0);
11066 SDValue N1 = N->getOperand(1);
11067 if (SDValue V = DAG.simplifyShift(N0, N1))
11068 return V;
11069
11070 SDLoc DL(N);
11071 EVT VT = N0.getValueType();
11072
11073 // fold (*shlsat c1, c2) -> c1<<c2
11074 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11075 return C;
11076
11078
11079 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
11080 // fold (sshlsat x, c) -> (shl x, c)
11081 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11082 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11083 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11084
11085 // fold (ushlsat x, c) -> (shl x, c)
11086 if (N->getOpcode() == ISD::USHLSAT && N1C &&
11087 N1C->getAPIntValue().ule(
11089 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11090 }
11091
11092 return SDValue();
11093}
11094
11095// Given a ABS node, detect the following patterns:
11096// (ABS (SUB (EXTEND a), (EXTEND b))).
11097// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
11098// Generates UABD/SABD instruction.
11099SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
11100 EVT SrcVT = N->getValueType(0);
11101
11102 if (N->getOpcode() == ISD::TRUNCATE)
11103 N = N->getOperand(0).getNode();
11104
11105 if (N->getOpcode() != ISD::ABS)
11106 return SDValue();
11107
11108 EVT VT = N->getValueType(0);
11109 SDValue AbsOp1 = N->getOperand(0);
11110 SDValue Op0, Op1;
11111
11112 if (AbsOp1.getOpcode() != ISD::SUB)
11113 return SDValue();
11114
11115 Op0 = AbsOp1.getOperand(0);
11116 Op1 = AbsOp1.getOperand(1);
11117
11118 unsigned Opc0 = Op0.getOpcode();
11119
11120 // Check if the operands of the sub are (zero|sign)-extended.
11121 // TODO: Should we use ValueTracking instead?
11122 if (Opc0 != Op1.getOpcode() ||
11123 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
11124 Opc0 != ISD::SIGN_EXTEND_INREG)) {
11125 // fold (abs (sub nsw x, y)) -> abds(x, y)
11126 // Don't fold this for unsupported types as we lose the NSW handling.
11127 if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
11128 TLI.preferABDSToABSWithNSW(VT)) {
11129 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
11130 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11131 }
11132 return SDValue();
11133 }
11134
11135 EVT VT0, VT1;
11136 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
11137 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
11138 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
11139 } else {
11140 VT0 = Op0.getOperand(0).getValueType();
11141 VT1 = Op1.getOperand(0).getValueType();
11142 }
11143 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
11144
11145 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11146 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11147 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
11148 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11149 (VT1 == MaxVT || Op1->hasOneUse()) &&
11150 (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
11151 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
11152 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
11153 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
11154 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
11155 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11156 }
11157
11158 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11159 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11160 if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
11161 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
11162 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11163 }
11164
11165 return SDValue();
11166}
11167
11168SDValue DAGCombiner::visitABS(SDNode *N) {
11169 SDValue N0 = N->getOperand(0);
11170 EVT VT = N->getValueType(0);
11171 SDLoc DL(N);
11172
11173 // fold (abs c1) -> c2
11174 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
11175 return C;
11176 // fold (abs (abs x)) -> (abs x)
11177 if (N0.getOpcode() == ISD::ABS)
11178 return N0;
11179 // fold (abs x) -> x iff not-negative
11180 if (DAG.SignBitIsZero(N0))
11181 return N0;
11182
11183 if (SDValue ABD = foldABSToABD(N, DL))
11184 return ABD;
11185
11186 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11187 // iff zero_extend/truncate are free.
11188 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11189 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11190 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11191 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11192 hasOperation(ISD::ABS, ExtVT)) {
11193 return DAG.getNode(
11194 ISD::ZERO_EXTEND, DL, VT,
11195 DAG.getNode(ISD::ABS, DL, ExtVT,
11196 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11197 }
11198 }
11199
11200 return SDValue();
11201}
11202
11203SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11204 SDValue N0 = N->getOperand(0);
11205 EVT VT = N->getValueType(0);
11206 SDLoc DL(N);
11207
11208 // fold (bswap c1) -> c2
11209 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11210 return C;
11211 // fold (bswap (bswap x)) -> x
11212 if (N0.getOpcode() == ISD::BSWAP)
11213 return N0.getOperand(0);
11214
11215 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11216 // isn't supported, it will be expanded to bswap followed by a manual reversal
11217 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11218 // the two bswaps if the bitreverse gets expanded.
11219 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11220 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11221 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11222 }
11223
11224 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11225 // iff x >= bw/2 (i.e. lower half is known zero)
11226 unsigned BW = VT.getScalarSizeInBits();
11227 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11228 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11229 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11230 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11231 ShAmt->getZExtValue() >= (BW / 2) &&
11232 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11233 TLI.isTruncateFree(VT, HalfVT) &&
11234 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11235 SDValue Res = N0.getOperand(0);
11236 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11237 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11238 DAG.getShiftAmountConstant(NewShAmt, VT, DL));
11239 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11240 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11241 return DAG.getZExtOrTrunc(Res, DL, VT);
11242 }
11243 }
11244
11245 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11246 // inverse-shift-of-bswap:
11247 // bswap (X u<< C) --> (bswap X) u>> C
11248 // bswap (X u>> C) --> (bswap X) u<< C
11249 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11250 N0.hasOneUse()) {
11251 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11252 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11253 ShAmt->getZExtValue() % 8 == 0) {
11254 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11255 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11256 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11257 }
11258 }
11259
11260 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11261 return V;
11262
11263 return SDValue();
11264}
11265
11266SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11267 SDValue N0 = N->getOperand(0);
11268 EVT VT = N->getValueType(0);
11269 SDLoc DL(N);
11270
11271 // fold (bitreverse c1) -> c2
11272 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11273 return C;
11274
11275 // fold (bitreverse (bitreverse x)) -> x
11276 if (N0.getOpcode() == ISD::BITREVERSE)
11277 return N0.getOperand(0);
11278
11279 SDValue X, Y;
11280
11281 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11282 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11284 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11285
11286 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11287 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11289 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11290
11291 return SDValue();
11292}
11293
11294SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11295 SDValue N0 = N->getOperand(0);
11296 EVT VT = N->getValueType(0);
11297 SDLoc DL(N);
11298
11299 // fold (ctlz c1) -> c2
11300 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11301 return C;
11302
11303 // If the value is known never to be zero, switch to the undef version.
11304 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11305 if (DAG.isKnownNeverZero(N0))
11306 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11307
11308 return SDValue();
11309}
11310
11311SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11312 SDValue N0 = N->getOperand(0);
11313 EVT VT = N->getValueType(0);
11314 SDLoc DL(N);
11315
11316 // fold (ctlz_zero_undef c1) -> c2
11317 if (SDValue C =
11319 return C;
11320 return SDValue();
11321}
11322
11323SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11324 SDValue N0 = N->getOperand(0);
11325 EVT VT = N->getValueType(0);
11326 SDLoc DL(N);
11327
11328 // fold (cttz c1) -> c2
11329 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11330 return C;
11331
11332 // If the value is known never to be zero, switch to the undef version.
11333 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11334 if (DAG.isKnownNeverZero(N0))
11335 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11336
11337 return SDValue();
11338}
11339
11340SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11341 SDValue N0 = N->getOperand(0);
11342 EVT VT = N->getValueType(0);
11343 SDLoc DL(N);
11344
11345 // fold (cttz_zero_undef c1) -> c2
11346 if (SDValue C =
11348 return C;
11349 return SDValue();
11350}
11351
11352SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11353 SDValue N0 = N->getOperand(0);
11354 EVT VT = N->getValueType(0);
11355 unsigned NumBits = VT.getScalarSizeInBits();
11356 SDLoc DL(N);
11357
11358 // fold (ctpop c1) -> c2
11359 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11360 return C;
11361
11362 // If the source is being shifted, but doesn't affect any active bits,
11363 // then we can call CTPOP on the shift source directly.
11364 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11365 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11366 const APInt &Amt = AmtC->getAPIntValue();
11367 if (Amt.ult(NumBits)) {
11368 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11369 if ((N0.getOpcode() == ISD::SRL &&
11370 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11371 (N0.getOpcode() == ISD::SHL &&
11372 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11373 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11374 }
11375 }
11376 }
11377 }
11378
11379 // If the upper bits are known to be zero, then see if its profitable to
11380 // only count the lower bits.
11381 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11382 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11383 if (hasOperation(ISD::CTPOP, HalfVT) &&
11384 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11385 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11386 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11387 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11388 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11389 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11390 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11391 }
11392 }
11393 }
11394
11395 return SDValue();
11396}
11397
11399 SDValue RHS, const SDNodeFlags Flags,
11400 const TargetLowering &TLI) {
11401 EVT VT = LHS.getValueType();
11402 if (!VT.isFloatingPoint())
11403 return false;
11404
11405 const TargetOptions &Options = DAG.getTarget().Options;
11406
11407 return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) &&
11409 (Flags.hasNoNaNs() ||
11410 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11411}
11412
11414 SDValue RHS, SDValue True, SDValue False,
11416 const TargetLowering &TLI,
11417 SelectionDAG &DAG) {
11418 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11419 switch (CC) {
11420 case ISD::SETOLT:
11421 case ISD::SETOLE:
11422 case ISD::SETLT:
11423 case ISD::SETLE:
11424 case ISD::SETULT:
11425 case ISD::SETULE: {
11426 // Since it's known never nan to get here already, either fminnum or
11427 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11428 // expanded in terms of it.
11429 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11430 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11431 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11432
11433 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11434 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11435 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11436 return SDValue();
11437 }
11438 case ISD::SETOGT:
11439 case ISD::SETOGE:
11440 case ISD::SETGT:
11441 case ISD::SETGE:
11442 case ISD::SETUGT:
11443 case ISD::SETUGE: {
11444 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11445 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11446 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11447
11448 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11449 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11450 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11451 return SDValue();
11452 }
11453 default:
11454 return SDValue();
11455 }
11456}
11457
11458SDValue DAGCombiner::foldShiftToAvg(SDNode *N) {
11459 const unsigned Opcode = N->getOpcode();
11460
11461 // Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
11462 if (Opcode != ISD::SRA && Opcode != ISD::SRL)
11463 return SDValue();
11464
11465 unsigned FloorISD = 0;
11466 auto VT = N->getValueType(0);
11467 bool IsUnsigned = false;
11468
11469 // Decide wether signed or unsigned.
11470 switch (Opcode) {
11471 case ISD::SRA:
11472 if (!hasOperation(ISD::AVGFLOORS, VT))
11473 return SDValue();
11474 FloorISD = ISD::AVGFLOORS;
11475 break;
11476 case ISD::SRL:
11477 IsUnsigned = true;
11478 if (!hasOperation(ISD::AVGFLOORU, VT))
11479 return SDValue();
11480 FloorISD = ISD::AVGFLOORU;
11481 break;
11482 default:
11483 return SDValue();
11484 }
11485
11486 // Captured values.
11487 SDValue A, B, Add;
11488
11489 // Match floor average as it is common to both floor/ceil avgs.
11490 if (!sd_match(N, m_BinOp(Opcode,
11492 m_One())))
11493 return SDValue();
11494
11495 // Can't optimize adds that may wrap.
11496 if (IsUnsigned && !Add->getFlags().hasNoUnsignedWrap())
11497 return SDValue();
11498
11499 if (!IsUnsigned && !Add->getFlags().hasNoSignedWrap())
11500 return SDValue();
11501
11502 return DAG.getNode(FloorISD, SDLoc(N), N->getValueType(0), {A, B});
11503}
11504
11505/// Generate Min/Max node
11506SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11507 SDValue RHS, SDValue True,
11508 SDValue False, ISD::CondCode CC) {
11509 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11510 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11511
11512 // If we can't directly match this, try to see if we can pull an fneg out of
11513 // the select.
11515 True, DAG, LegalOperations, ForCodeSize);
11516 if (!NegTrue)
11517 return SDValue();
11518
11519 HandleSDNode NegTrueHandle(NegTrue);
11520
11521 // Try to unfold an fneg from the select if we are comparing the negated
11522 // constant.
11523 //
11524 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11525 //
11526 // TODO: Handle fabs
11527 if (LHS == NegTrue) {
11528 // If we can't directly match this, try to see if we can pull an fneg out of
11529 // the select.
11531 RHS, DAG, LegalOperations, ForCodeSize);
11532 if (NegRHS) {
11533 HandleSDNode NegRHSHandle(NegRHS);
11534 if (NegRHS == False) {
11535 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11536 False, CC, TLI, DAG);
11537 if (Combined)
11538 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11539 }
11540 }
11541 }
11542
11543 return SDValue();
11544}
11545
11546/// If a (v)select has a condition value that is a sign-bit test, try to smear
11547/// the condition operand sign-bit across the value width and use it as a mask.
11549 SelectionDAG &DAG) {
11550 SDValue Cond = N->getOperand(0);
11551 SDValue C1 = N->getOperand(1);
11552 SDValue C2 = N->getOperand(2);
11554 return SDValue();
11555
11556 EVT VT = N->getValueType(0);
11557 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11558 VT != Cond.getOperand(0).getValueType())
11559 return SDValue();
11560
11561 // The inverted-condition + commuted-select variants of these patterns are
11562 // canonicalized to these forms in IR.
11563 SDValue X = Cond.getOperand(0);
11564 SDValue CondC = Cond.getOperand(1);
11565 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11566 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11568 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11569 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11570 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11571 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11572 }
11573 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11574 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11575 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11576 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11577 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11578 }
11579 return SDValue();
11580}
11581
11583 const TargetLowering &TLI) {
11584 if (!TLI.convertSelectOfConstantsToMath(VT))
11585 return false;
11586
11587 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11588 return true;
11590 return true;
11591
11592 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11593 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11594 return true;
11595 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11596 return true;
11597
11598 return false;
11599}
11600
11601SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11602 SDValue Cond = N->getOperand(0);
11603 SDValue N1 = N->getOperand(1);
11604 SDValue N2 = N->getOperand(2);
11605 EVT VT = N->getValueType(0);
11606 EVT CondVT = Cond.getValueType();
11607 SDLoc DL(N);
11608
11609 if (!VT.isInteger())
11610 return SDValue();
11611
11612 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11613 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11614 if (!C1 || !C2)
11615 return SDValue();
11616
11617 if (CondVT != MVT::i1 || LegalOperations) {
11618 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11619 // We can't do this reliably if integer based booleans have different contents
11620 // to floating point based booleans. This is because we can't tell whether we
11621 // have an integer-based boolean or a floating-point-based boolean unless we
11622 // can find the SETCC that produced it and inspect its operands. This is
11623 // fairly easy if C is the SETCC node, but it can potentially be
11624 // undiscoverable (or not reasonably discoverable). For example, it could be
11625 // in another basic block or it could require searching a complicated
11626 // expression.
11627 if (CondVT.isInteger() &&
11628 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11630 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11632 C1->isZero() && C2->isOne()) {
11633 SDValue NotCond =
11634 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11635 if (VT.bitsEq(CondVT))
11636 return NotCond;
11637 return DAG.getZExtOrTrunc(NotCond, DL, VT);
11638 }
11639
11640 return SDValue();
11641 }
11642
11643 // Only do this before legalization to avoid conflicting with target-specific
11644 // transforms in the other direction (create a select from a zext/sext). There
11645 // is also a target-independent combine here in DAGCombiner in the other
11646 // direction for (select Cond, -1, 0) when the condition is not i1.
11647 assert(CondVT == MVT::i1 && !LegalOperations);
11648
11649 // select Cond, 1, 0 --> zext (Cond)
11650 if (C1->isOne() && C2->isZero())
11651 return DAG.getZExtOrTrunc(Cond, DL, VT);
11652
11653 // select Cond, -1, 0 --> sext (Cond)
11654 if (C1->isAllOnes() && C2->isZero())
11655 return DAG.getSExtOrTrunc(Cond, DL, VT);
11656
11657 // select Cond, 0, 1 --> zext (!Cond)
11658 if (C1->isZero() && C2->isOne()) {
11659 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11660 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11661 return NotCond;
11662 }
11663
11664 // select Cond, 0, -1 --> sext (!Cond)
11665 if (C1->isZero() && C2->isAllOnes()) {
11666 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11667 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11668 return NotCond;
11669 }
11670
11671 // Use a target hook because some targets may prefer to transform in the
11672 // other direction.
11674 return SDValue();
11675
11676 // For any constants that differ by 1, we can transform the select into
11677 // an extend and add.
11678 const APInt &C1Val = C1->getAPIntValue();
11679 const APInt &C2Val = C2->getAPIntValue();
11680
11681 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11682 if (C1Val - 1 == C2Val) {
11683 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11684 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11685 }
11686
11687 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11688 if (C1Val + 1 == C2Val) {
11689 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11690 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11691 }
11692
11693 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11694 if (C1Val.isPowerOf2() && C2Val.isZero()) {
11695 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11696 SDValue ShAmtC =
11697 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11698 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11699 }
11700
11701 // select Cond, -1, C --> or (sext Cond), C
11702 if (C1->isAllOnes()) {
11703 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11704 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11705 }
11706
11707 // select Cond, C, -1 --> or (sext (not Cond)), C
11708 if (C2->isAllOnes()) {
11709 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11710 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11711 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11712 }
11713
11715 return V;
11716
11717 return SDValue();
11718}
11719
11720template <class MatchContextClass>
11722 SelectionDAG &DAG) {
11723 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11724 N->getOpcode() == ISD::VP_SELECT) &&
11725 "Expected a (v)(vp.)select");
11726 SDValue Cond = N->getOperand(0);
11727 SDValue T = N->getOperand(1), F = N->getOperand(2);
11728 EVT VT = N->getValueType(0);
11729 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11730 MatchContextClass matcher(DAG, TLI, N);
11731
11732 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11733 return SDValue();
11734
11735 // select Cond, Cond, F --> or Cond, freeze(F)
11736 // select Cond, 1, F --> or Cond, freeze(F)
11737 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11738 return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
11739
11740 // select Cond, T, Cond --> and Cond, freeze(T)
11741 // select Cond, T, 0 --> and Cond, freeze(T)
11742 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11743 return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
11744
11745 // select Cond, T, 1 --> or (not Cond), freeze(T)
11746 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11747 SDValue NotCond =
11748 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
11749 return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
11750 }
11751
11752 // select Cond, 0, F --> and (not Cond), freeze(F)
11753 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11754 SDValue NotCond =
11755 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
11756 return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
11757 }
11758
11759 return SDValue();
11760}
11761
11763 SDValue N0 = N->getOperand(0);
11764 SDValue N1 = N->getOperand(1);
11765 SDValue N2 = N->getOperand(2);
11766 EVT VT = N->getValueType(0);
11767 unsigned EltSizeInBits = VT.getScalarSizeInBits();
11768
11769 SDValue Cond0, Cond1;
11771 if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
11772 m_CondCode(CC)))) ||
11773 VT != Cond0.getValueType())
11774 return SDValue();
11775
11776 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11777 // compare is inverted from that pattern ("Cond0 s> -1").
11778 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11779 ; // This is the pattern we are looking for.
11780 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11781 std::swap(N1, N2);
11782 else
11783 return SDValue();
11784
11785 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
11786 if (isNullOrNullSplat(N2)) {
11787 SDLoc DL(N);
11788 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11789 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11790 return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
11791 }
11792
11793 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
11794 if (isAllOnesOrAllOnesSplat(N1)) {
11795 SDLoc DL(N);
11796 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11797 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11798 return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
11799 }
11800
11801 // If we have to invert the sign bit mask, only do that transform if the
11802 // target has a bitwise 'and not' instruction (the invert is free).
11803 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
11804 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11805 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11806 SDLoc DL(N);
11807 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
11808 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11809 SDValue Not = DAG.getNOT(DL, Sra, VT);
11810 return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
11811 }
11812
11813 // TODO: There's another pattern in this family, but it may require
11814 // implementing hasOrNot() to check for profitability:
11815 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
11816
11817 return SDValue();
11818}
11819
11820// Match SELECTs with absolute difference patterns.
11821// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
11822// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
11823// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
11824// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
11825SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
11826 SDValue False, ISD::CondCode CC,
11827 const SDLoc &DL) {
11828 bool IsSigned = isSignedIntSetCC(CC);
11829 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
11830 EVT VT = LHS.getValueType();
11831
11832 if (LegalOperations && !hasOperation(ABDOpc, VT))
11833 return SDValue();
11834
11835 switch (CC) {
11836 case ISD::SETGT:
11837 case ISD::SETGE:
11838 case ISD::SETUGT:
11839 case ISD::SETUGE:
11840 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
11841 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))))
11842 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
11843 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
11844 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
11845 hasOperation(ABDOpc, VT))
11846 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
11847 break;
11848 case ISD::SETLT:
11849 case ISD::SETLE:
11850 case ISD::SETULT:
11851 case ISD::SETULE:
11852 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
11853 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))))
11854 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
11855 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
11856 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
11857 hasOperation(ABDOpc, VT))
11858 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
11859 break;
11860 default:
11861 break;
11862 }
11863
11864 return SDValue();
11865}
11866
11867SDValue DAGCombiner::visitSELECT(SDNode *N) {
11868 SDValue N0 = N->getOperand(0);
11869 SDValue N1 = N->getOperand(1);
11870 SDValue N2 = N->getOperand(2);
11871 EVT VT = N->getValueType(0);
11872 EVT VT0 = N0.getValueType();
11873 SDLoc DL(N);
11874 SDNodeFlags Flags = N->getFlags();
11875
11876 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11877 return V;
11878
11879 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
11880 return V;
11881
11882 // select (not Cond), N1, N2 -> select Cond, N2, N1
11883 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11884 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11885 SelectOp->setFlags(Flags);
11886 return SelectOp;
11887 }
11888
11889 if (SDValue V = foldSelectOfConstants(N))
11890 return V;
11891
11892 // If we can fold this based on the true/false value, do so.
11893 if (SimplifySelectOps(N, N1, N2))
11894 return SDValue(N, 0); // Don't revisit N.
11895
11896 if (VT0 == MVT::i1) {
11897 // The code in this block deals with the following 2 equivalences:
11898 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11899 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11900 // The target can specify its preferred form with the
11901 // shouldNormalizeToSelectSequence() callback. However we always transform
11902 // to the right anyway if we find the inner select exists in the DAG anyway
11903 // and we always transform to the left side if we know that we can further
11904 // optimize the combination of the conditions.
11905 bool normalizeToSequence =
11907 // select (and Cond0, Cond1), X, Y
11908 // -> select Cond0, (select Cond1, X, Y), Y
11909 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11910 SDValue Cond0 = N0->getOperand(0);
11911 SDValue Cond1 = N0->getOperand(1);
11912 SDValue InnerSelect =
11913 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11914 if (normalizeToSequence || !InnerSelect.use_empty())
11915 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11916 InnerSelect, N2, Flags);
11917 // Cleanup on failure.
11918 if (InnerSelect.use_empty())
11919 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11920 }
11921 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11922 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11923 SDValue Cond0 = N0->getOperand(0);
11924 SDValue Cond1 = N0->getOperand(1);
11925 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11926 Cond1, N1, N2, Flags);
11927 if (normalizeToSequence || !InnerSelect.use_empty())
11928 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11929 InnerSelect, Flags);
11930 // Cleanup on failure.
11931 if (InnerSelect.use_empty())
11932 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11933 }
11934
11935 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11936 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11937 SDValue N1_0 = N1->getOperand(0);
11938 SDValue N1_1 = N1->getOperand(1);
11939 SDValue N1_2 = N1->getOperand(2);
11940 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11941 // Create the actual and node if we can generate good code for it.
11942 if (!normalizeToSequence) {
11943 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11944 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11945 N2, Flags);
11946 }
11947 // Otherwise see if we can optimize the "and" to a better pattern.
11948 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11949 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11950 N2, Flags);
11951 }
11952 }
11953 }
11954 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11955 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11956 SDValue N2_0 = N2->getOperand(0);
11957 SDValue N2_1 = N2->getOperand(1);
11958 SDValue N2_2 = N2->getOperand(2);
11959 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11960 // Create the actual or node if we can generate good code for it.
11961 if (!normalizeToSequence) {
11962 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11963 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11964 N2_2, Flags);
11965 }
11966 // Otherwise see if we can optimize to a better pattern.
11967 if (SDValue Combined = visitORLike(N0, N2_0, DL))
11968 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11969 N2_2, Flags);
11970 }
11971 }
11972
11973 // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
11974 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
11975 N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
11976 N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
11977 N2.getOperand(1) == N1.getOperand(0) &&
11978 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
11979 return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
11980
11981 // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
11982 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
11983 N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
11984 N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
11985 N2.getOperand(1) == N1.getOperand(0) &&
11986 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
11987 return DAG.getNegative(
11988 DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
11989 DL, VT);
11990 }
11991
11992 // Fold selects based on a setcc into other things, such as min/max/abs.
11993 if (N0.getOpcode() == ISD::SETCC) {
11994 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11995 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11996
11997 // select (fcmp lt x, y), x, y -> fminnum x, y
11998 // select (fcmp gt x, y), x, y -> fmaxnum x, y
11999 //
12000 // This is OK if we don't care what happens if either operand is a NaN.
12001 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
12002 if (SDValue FMinMax =
12003 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
12004 return FMinMax;
12005
12006 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
12007 // This is conservatively limited to pre-legal-operations to give targets
12008 // a chance to reverse the transform if they want to do that. Also, it is
12009 // unlikely that the pattern would be formed late, so it's probably not
12010 // worth going through the other checks.
12011 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
12012 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
12013 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
12014 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
12015 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
12016 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12017 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12018 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12019 //
12020 // The IR equivalent of this transform would have this form:
12021 // %a = add %x, C
12022 // %c = icmp ugt %x, ~C
12023 // %r = select %c, -1, %a
12024 // =>
12025 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
12026 // %u0 = extractvalue %u, 0
12027 // %u1 = extractvalue %u, 1
12028 // %r = select %u1, -1, %u0
12029 SDVTList VTs = DAG.getVTList(VT, VT0);
12030 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
12031 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
12032 }
12033 }
12034
12035 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
12036 (!LegalOperations &&
12038 // Any flags available in a select/setcc fold will be on the setcc as they
12039 // migrated from fcmp
12040 Flags = N0->getFlags();
12041 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
12042 N2, N0.getOperand(2));
12043 SelectNode->setFlags(Flags);
12044 return SelectNode;
12045 }
12046
12047 if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
12048 return ABD;
12049
12050 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
12051 return NewSel;
12052 }
12053
12054 if (!VT.isVector())
12055 if (SDValue BinOp = foldSelectOfBinops(N))
12056 return BinOp;
12057
12058 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
12059 return R;
12060
12061 return SDValue();
12062}
12063
12064// This function assumes all the vselect's arguments are CONCAT_VECTOR
12065// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
12067 SDLoc DL(N);
12068 SDValue Cond = N->getOperand(0);
12069 SDValue LHS = N->getOperand(1);
12070 SDValue RHS = N->getOperand(2);
12071 EVT VT = N->getValueType(0);
12072 int NumElems = VT.getVectorNumElements();
12073 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
12074 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
12075 Cond.getOpcode() == ISD::BUILD_VECTOR);
12076
12077 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
12078 // binary ones here.
12079 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
12080 return SDValue();
12081
12082 // We're sure we have an even number of elements due to the
12083 // concat_vectors we have as arguments to vselect.
12084 // Skip BV elements until we find one that's not an UNDEF
12085 // After we find an UNDEF element, keep looping until we get to half the
12086 // length of the BV and see if all the non-undef nodes are the same.
12087 ConstantSDNode *BottomHalf = nullptr;
12088 for (int i = 0; i < NumElems / 2; ++i) {
12089 if (Cond->getOperand(i)->isUndef())
12090 continue;
12091
12092 if (BottomHalf == nullptr)
12093 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12094 else if (Cond->getOperand(i).getNode() != BottomHalf)
12095 return SDValue();
12096 }
12097
12098 // Do the same for the second half of the BuildVector
12099 ConstantSDNode *TopHalf = nullptr;
12100 for (int i = NumElems / 2; i < NumElems; ++i) {
12101 if (Cond->getOperand(i)->isUndef())
12102 continue;
12103
12104 if (TopHalf == nullptr)
12105 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12106 else if (Cond->getOperand(i).getNode() != TopHalf)
12107 return SDValue();
12108 }
12109
12110 assert(TopHalf && BottomHalf &&
12111 "One half of the selector was all UNDEFs and the other was all the "
12112 "same value. This should have been addressed before this function.");
12113 return DAG.getNode(
12115 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
12116 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
12117}
12118
12119bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
12120 SelectionDAG &DAG, const SDLoc &DL) {
12121
12122 // Only perform the transformation when existing operands can be reused.
12123 if (IndexIsScaled)
12124 return false;
12125
12126 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
12127 return false;
12128
12129 EVT VT = BasePtr.getValueType();
12130
12131 if (SDValue SplatVal = DAG.getSplatValue(Index);
12132 SplatVal && !isNullConstant(SplatVal) &&
12133 SplatVal.getValueType() == VT) {
12134 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12135 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
12136 return true;
12137 }
12138
12139 if (Index.getOpcode() != ISD::ADD)
12140 return false;
12141
12142 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
12143 SplatVal && SplatVal.getValueType() == VT) {
12144 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12145 Index = Index.getOperand(1);
12146 return true;
12147 }
12148 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
12149 SplatVal && SplatVal.getValueType() == VT) {
12150 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12151 Index = Index.getOperand(0);
12152 return true;
12153 }
12154 return false;
12155}
12156
12157// Fold sext/zext of index into index type.
12158bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
12159 SelectionDAG &DAG) {
12160 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12161
12162 // It's always safe to look through zero extends.
12163 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
12164 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12165 IndexType = ISD::UNSIGNED_SCALED;
12166 Index = Index.getOperand(0);
12167 return true;
12168 }
12169 if (ISD::isIndexTypeSigned(IndexType)) {
12170 IndexType = ISD::UNSIGNED_SCALED;
12171 return true;
12172 }
12173 }
12174
12175 // It's only safe to look through sign extends when Index is signed.
12176 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
12177 ISD::isIndexTypeSigned(IndexType) &&
12178 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12179 Index = Index.getOperand(0);
12180 return true;
12181 }
12182
12183 return false;
12184}
12185
12186SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
12187 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
12188 SDValue Mask = MSC->getMask();
12189 SDValue Chain = MSC->getChain();
12190 SDValue Index = MSC->getIndex();
12191 SDValue Scale = MSC->getScale();
12192 SDValue StoreVal = MSC->getValue();
12193 SDValue BasePtr = MSC->getBasePtr();
12194 SDValue VL = MSC->getVectorLength();
12195 ISD::MemIndexType IndexType = MSC->getIndexType();
12196 SDLoc DL(N);
12197
12198 // Zap scatters with a zero mask.
12200 return Chain;
12201
12202 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12203 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12204 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12205 DL, Ops, MSC->getMemOperand(), IndexType);
12206 }
12207
12208 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12209 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12210 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12211 DL, Ops, MSC->getMemOperand(), IndexType);
12212 }
12213
12214 return SDValue();
12215}
12216
12217SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
12218 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
12219 SDValue Mask = MSC->getMask();
12220 SDValue Chain = MSC->getChain();
12221 SDValue Index = MSC->getIndex();
12222 SDValue Scale = MSC->getScale();
12223 SDValue StoreVal = MSC->getValue();
12224 SDValue BasePtr = MSC->getBasePtr();
12225 ISD::MemIndexType IndexType = MSC->getIndexType();
12226 SDLoc DL(N);
12227
12228 // Zap scatters with a zero mask.
12230 return Chain;
12231
12232 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12233 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12234 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12235 DL, Ops, MSC->getMemOperand(), IndexType,
12236 MSC->isTruncatingStore());
12237 }
12238
12239 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12240 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12241 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12242 DL, Ops, MSC->getMemOperand(), IndexType,
12243 MSC->isTruncatingStore());
12244 }
12245
12246 return SDValue();
12247}
12248
12249SDValue DAGCombiner::visitMSTORE(SDNode *N) {
12250 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
12251 SDValue Mask = MST->getMask();
12252 SDValue Chain = MST->getChain();
12253 SDValue Value = MST->getValue();
12254 SDValue Ptr = MST->getBasePtr();
12255 SDLoc DL(N);
12256
12257 // Zap masked stores with a zero mask.
12259 return Chain;
12260
12261 // Remove a masked store if base pointers and masks are equal.
12262 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
12263 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
12264 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12265 !MST->getBasePtr().isUndef() &&
12266 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12267 MST1->getMemoryVT().getStoreSize()) ||
12269 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12270 MST->getMemoryVT().getStoreSize())) {
12271 CombineTo(MST1, MST1->getChain());
12272 if (N->getOpcode() != ISD::DELETED_NODE)
12273 AddToWorklist(N);
12274 return SDValue(N, 0);
12275 }
12276 }
12277
12278 // If this is a masked load with an all ones mask, we can use a unmasked load.
12279 // FIXME: Can we do this for indexed, compressing, or truncating stores?
12280 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12281 !MST->isCompressingStore() && !MST->isTruncatingStore())
12282 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12283 MST->getBasePtr(), MST->getPointerInfo(),
12284 MST->getOriginalAlign(),
12285 MST->getMemOperand()->getFlags(), MST->getAAInfo());
12286
12287 // Try transforming N to an indexed store.
12288 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12289 return SDValue(N, 0);
12290
12291 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12292 Value.getValueType().isInteger() &&
12293 (!isa<ConstantSDNode>(Value) ||
12294 !cast<ConstantSDNode>(Value)->isOpaque())) {
12295 APInt TruncDemandedBits =
12296 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12298
12299 // See if we can simplify the operation with
12300 // SimplifyDemandedBits, which only works if the value has a single use.
12301 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12302 // Re-visit the store if anything changed and the store hasn't been merged
12303 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12304 // node back to the worklist if necessary, but we also need to re-visit
12305 // the Store node itself.
12306 if (N->getOpcode() != ISD::DELETED_NODE)
12307 AddToWorklist(N);
12308 return SDValue(N, 0);
12309 }
12310 }
12311
12312 // If this is a TRUNC followed by a masked store, fold this into a masked
12313 // truncating store. We can do this even if this is already a masked
12314 // truncstore.
12315 // TODO: Try combine to masked compress store if possiable.
12316 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12317 MST->isUnindexed() && !MST->isCompressingStore() &&
12318 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12319 MST->getMemoryVT(), LegalOperations)) {
12320 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12321 Value.getOperand(0).getValueType());
12322 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12323 MST->getOffset(), Mask, MST->getMemoryVT(),
12324 MST->getMemOperand(), MST->getAddressingMode(),
12325 /*IsTruncating=*/true);
12326 }
12327
12328 return SDValue();
12329}
12330
12331SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12332 auto *SST = cast<VPStridedStoreSDNode>(N);
12333 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12334 // Combine strided stores with unit-stride to a regular VP store.
12335 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12336 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12337 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12338 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12339 SST->getVectorLength(), SST->getMemoryVT(),
12340 SST->getMemOperand(), SST->getAddressingMode(),
12341 SST->isTruncatingStore(), SST->isCompressingStore());
12342 }
12343 return SDValue();
12344}
12345
12346SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
12347 SDLoc DL(N);
12348 SDValue Vec = N->getOperand(0);
12349 SDValue Mask = N->getOperand(1);
12350 SDValue Passthru = N->getOperand(2);
12351 EVT VecVT = Vec.getValueType();
12352
12353 bool HasPassthru = !Passthru.isUndef();
12354
12355 APInt SplatVal;
12356 if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
12357 return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
12358
12359 if (Vec.isUndef() || Mask.isUndef())
12360 return Passthru;
12361
12362 // No need for potentially expensive compress if the mask is constant.
12365 EVT ScalarVT = VecVT.getVectorElementType();
12366 unsigned NumSelected = 0;
12367 unsigned NumElmts = VecVT.getVectorNumElements();
12368 for (unsigned I = 0; I < NumElmts; ++I) {
12369 SDValue MaskI = Mask.getOperand(I);
12370 // We treat undef mask entries as "false".
12371 if (MaskI.isUndef())
12372 continue;
12373
12374 if (TLI.isConstTrueVal(MaskI)) {
12375 SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
12376 DAG.getVectorIdxConstant(I, DL));
12377 Ops.push_back(VecI);
12378 NumSelected++;
12379 }
12380 }
12381 for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
12382 SDValue Val =
12383 HasPassthru
12384 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
12385 DAG.getVectorIdxConstant(Rest, DL))
12386 : DAG.getUNDEF(ScalarVT);
12387 Ops.push_back(Val);
12388 }
12389 return DAG.getBuildVector(VecVT, DL, Ops);
12390 }
12391
12392 return SDValue();
12393}
12394
12395SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12396 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12397 SDValue Mask = MGT->getMask();
12398 SDValue Chain = MGT->getChain();
12399 SDValue Index = MGT->getIndex();
12400 SDValue Scale = MGT->getScale();
12401 SDValue BasePtr = MGT->getBasePtr();
12402 SDValue VL = MGT->getVectorLength();
12403 ISD::MemIndexType IndexType = MGT->getIndexType();
12404 SDLoc DL(N);
12405
12406 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12407 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12408 return DAG.getGatherVP(
12409 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12410 Ops, MGT->getMemOperand(), IndexType);
12411 }
12412
12413 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12414 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12415 return DAG.getGatherVP(
12416 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12417 Ops, MGT->getMemOperand(), IndexType);
12418 }
12419
12420 return SDValue();
12421}
12422
12423SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12424 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12425 SDValue Mask = MGT->getMask();
12426 SDValue Chain = MGT->getChain();
12427 SDValue Index = MGT->getIndex();
12428 SDValue Scale = MGT->getScale();
12429 SDValue PassThru = MGT->getPassThru();
12430 SDValue BasePtr = MGT->getBasePtr();
12431 ISD::MemIndexType IndexType = MGT->getIndexType();
12432 SDLoc DL(N);
12433
12434 // Zap gathers with a zero mask.
12436 return CombineTo(N, PassThru, MGT->getChain());
12437
12438 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12439 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12440 return DAG.getMaskedGather(
12441 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12442 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12443 }
12444
12445 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12446 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12447 return DAG.getMaskedGather(
12448 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12449 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12450 }
12451
12452 return SDValue();
12453}
12454
12455SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12456 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12457 SDValue Mask = MLD->getMask();
12458 SDLoc DL(N);
12459
12460 // Zap masked loads with a zero mask.
12462 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12463
12464 // If this is a masked load with an all ones mask, we can use a unmasked load.
12465 // FIXME: Can we do this for indexed, expanding, or extending loads?
12466 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12467 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12468 SDValue NewLd = DAG.getLoad(
12469 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12470 MLD->getPointerInfo(), MLD->getOriginalAlign(),
12471 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12472 return CombineTo(N, NewLd, NewLd.getValue(1));
12473 }
12474
12475 // Try transforming N to an indexed load.
12476 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12477 return SDValue(N, 0);
12478
12479 return SDValue();
12480}
12481
12482SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
12483 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
12484 SDValue Chain = HG->getChain();
12485 SDValue Inc = HG->getInc();
12486 SDValue Mask = HG->getMask();
12487 SDValue BasePtr = HG->getBasePtr();
12488 SDValue Index = HG->getIndex();
12489 SDLoc DL(HG);
12490
12491 EVT MemVT = HG->getMemoryVT();
12492 MachineMemOperand *MMO = HG->getMemOperand();
12493 ISD::MemIndexType IndexType = HG->getIndexType();
12494
12496 return Chain;
12497
12498 SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
12499 HG->getScale(), HG->getIntID()};
12500 if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL))
12501 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
12502 MMO, IndexType);
12503
12504 EVT DataVT = Index.getValueType();
12505 if (refineIndexType(Index, IndexType, DataVT, DAG))
12506 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
12507 MMO, IndexType);
12508 return SDValue();
12509}
12510
12511SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
12512 auto *SLD = cast<VPStridedLoadSDNode>(N);
12513 EVT EltVT = SLD->getValueType(0).getVectorElementType();
12514 // Combine strided loads with unit-stride to a regular VP load.
12515 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
12516 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12517 SDValue NewLd = DAG.getLoadVP(
12518 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
12519 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
12520 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12521 SLD->getMemOperand(), SLD->isExpandingLoad());
12522 return CombineTo(N, NewLd, NewLd.getValue(1));
12523 }
12524 return SDValue();
12525}
12526
12527/// A vector select of 2 constant vectors can be simplified to math/logic to
12528/// avoid a variable select instruction and possibly avoid constant loads.
12529SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
12530 SDValue Cond = N->getOperand(0);
12531 SDValue N1 = N->getOperand(1);
12532 SDValue N2 = N->getOperand(2);
12533 EVT VT = N->getValueType(0);
12534 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
12538 return SDValue();
12539
12540 // Check if we can use the condition value to increment/decrement a single
12541 // constant value. This simplifies a select to an add and removes a constant
12542 // load/materialization from the general case.
12543 bool AllAddOne = true;
12544 bool AllSubOne = true;
12545 unsigned Elts = VT.getVectorNumElements();
12546 for (unsigned i = 0; i != Elts; ++i) {
12547 SDValue N1Elt = N1.getOperand(i);
12548 SDValue N2Elt = N2.getOperand(i);
12549 if (N1Elt.isUndef() || N2Elt.isUndef())
12550 continue;
12551 if (N1Elt.getValueType() != N2Elt.getValueType()) {
12552 AllAddOne = false;
12553 AllSubOne = false;
12554 break;
12555 }
12556
12557 const APInt &C1 = N1Elt->getAsAPIntVal();
12558 const APInt &C2 = N2Elt->getAsAPIntVal();
12559 if (C1 != C2 + 1)
12560 AllAddOne = false;
12561 if (C1 != C2 - 1)
12562 AllSubOne = false;
12563 }
12564
12565 // Further simplifications for the extra-special cases where the constants are
12566 // all 0 or all -1 should be implemented as folds of these patterns.
12567 SDLoc DL(N);
12568 if (AllAddOne || AllSubOne) {
12569 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12570 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12571 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12572 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12573 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12574 }
12575
12576 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12577 APInt Pow2C;
12578 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12579 isNullOrNullSplat(N2)) {
12580 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12581 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12582 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12583 }
12584
12586 return V;
12587
12588 // The general case for select-of-constants:
12589 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12590 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12591 // leave that to a machine-specific pass.
12592 return SDValue();
12593}
12594
12595SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
12596 SDValue N0 = N->getOperand(0);
12597 SDValue N1 = N->getOperand(1);
12598 SDValue N2 = N->getOperand(2);
12599 SDLoc DL(N);
12600
12601 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12602 return V;
12603
12604 if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DL, DAG))
12605 return V;
12606
12607 return SDValue();
12608}
12609
12610SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12611 SDValue N0 = N->getOperand(0);
12612 SDValue N1 = N->getOperand(1);
12613 SDValue N2 = N->getOperand(2);
12614 EVT VT = N->getValueType(0);
12615 SDLoc DL(N);
12616
12617 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12618 return V;
12619
12620 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
12621 return V;
12622
12623 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12624 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12625 return DAG.getSelect(DL, VT, F, N2, N1);
12626
12627 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12628 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12631 TLI.getBooleanContents(N0.getValueType()) ==
12633 return DAG.getNode(
12634 ISD::ADD, DL, N1.getValueType(), N2,
12635 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
12636 }
12637
12638 // Canonicalize integer abs.
12639 // vselect (setg[te] X, 0), X, -X ->
12640 // vselect (setgt X, -1), X, -X ->
12641 // vselect (setl[te] X, 0), -X, X ->
12642 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12643 if (N0.getOpcode() == ISD::SETCC) {
12644 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12645 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12646 bool isAbs = false;
12647 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12648
12649 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12650 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12651 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12653 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12654 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12656
12657 if (isAbs) {
12659 return DAG.getNode(ISD::ABS, DL, VT, LHS);
12660
12661 SDValue Shift = DAG.getNode(
12662 ISD::SRA, DL, VT, LHS,
12663 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
12664 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12665 AddToWorklist(Shift.getNode());
12666 AddToWorklist(Add.getNode());
12667 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12668 }
12669
12670 // vselect x, y (fcmp lt x, y) -> fminnum x, y
12671 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12672 //
12673 // This is OK if we don't care about what happens if either operand is a
12674 // NaN.
12675 //
12676 if (N0.hasOneUse() &&
12677 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
12678 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12679 return FMinMax;
12680 }
12681
12682 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12683 return S;
12684 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12685 return S;
12686
12687 // If this select has a condition (setcc) with narrower operands than the
12688 // select, try to widen the compare to match the select width.
12689 // TODO: This should be extended to handle any constant.
12690 // TODO: This could be extended to handle non-loading patterns, but that
12691 // requires thorough testing to avoid regressions.
12692 if (isNullOrNullSplat(RHS)) {
12693 EVT NarrowVT = LHS.getValueType();
12695 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12696 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12697 unsigned WideWidth = WideVT.getScalarSizeInBits();
12698 bool IsSigned = isSignedIntSetCC(CC);
12699 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12700 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12701 SetCCWidth != 1 && SetCCWidth < WideWidth &&
12702 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12703 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12704 // Both compare operands can be widened for free. The LHS can use an
12705 // extended load, and the RHS is a constant:
12706 // vselect (ext (setcc load(X), C)), N1, N2 -->
12707 // vselect (setcc extload(X), C'), N1, N2
12708 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12709 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12710 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12711 EVT WideSetCCVT = getSetCCResultType(WideVT);
12712 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12713 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12714 }
12715 }
12716
12717 if (SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2, CC, DL))
12718 return ABD;
12719
12720 // Match VSELECTs into add with unsigned saturation.
12721 if (hasOperation(ISD::UADDSAT, VT)) {
12722 // Check if one of the arms of the VSELECT is vector with all bits set.
12723 // If it's on the left side invert the predicate to simplify logic below.
12724 SDValue Other;
12725 ISD::CondCode SatCC = CC;
12727 Other = N2;
12728 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12729 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12730 Other = N1;
12731 }
12732
12733 if (Other && Other.getOpcode() == ISD::ADD) {
12734 SDValue CondLHS = LHS, CondRHS = RHS;
12735 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12736
12737 // Canonicalize condition operands.
12738 if (SatCC == ISD::SETUGE) {
12739 std::swap(CondLHS, CondRHS);
12740 SatCC = ISD::SETULE;
12741 }
12742
12743 // We can test against either of the addition operands.
12744 // x <= x+y ? x+y : ~0 --> uaddsat x, y
12745 // x+y >= x ? x+y : ~0 --> uaddsat x, y
12746 if (SatCC == ISD::SETULE && Other == CondRHS &&
12747 (OpLHS == CondLHS || OpRHS == CondLHS))
12748 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12749
12750 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12751 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12752 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12753 CondLHS == OpLHS) {
12754 // If the RHS is a constant we have to reverse the const
12755 // canonicalization.
12756 // x >= ~C ? x+C : ~0 --> uaddsat x, C
12757 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12758 return Cond->getAPIntValue() == ~Op->getAPIntValue();
12759 };
12760 if (SatCC == ISD::SETULE &&
12761 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12762 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12763 }
12764 }
12765 }
12766
12767 // Match VSELECTs into sub with unsigned saturation.
12768 if (hasOperation(ISD::USUBSAT, VT)) {
12769 // Check if one of the arms of the VSELECT is a zero vector. If it's on
12770 // the left side invert the predicate to simplify logic below.
12771 SDValue Other;
12772 ISD::CondCode SatCC = CC;
12774 Other = N2;
12775 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12777 Other = N1;
12778 }
12779
12780 // zext(x) >= y ? trunc(zext(x) - y) : 0
12781 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12782 // zext(x) > y ? trunc(zext(x) - y) : 0
12783 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12784 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12785 Other.getOperand(0).getOpcode() == ISD::SUB &&
12786 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12787 SDValue OpLHS = Other.getOperand(0).getOperand(0);
12788 SDValue OpRHS = Other.getOperand(0).getOperand(1);
12789 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12790 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12791 DAG, DL))
12792 return R;
12793 }
12794
12795 if (Other && Other.getNumOperands() == 2) {
12796 SDValue CondRHS = RHS;
12797 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12798
12799 if (OpLHS == LHS) {
12800 // Look for a general sub with unsigned saturation first.
12801 // x >= y ? x-y : 0 --> usubsat x, y
12802 // x > y ? x-y : 0 --> usubsat x, y
12803 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12804 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12805 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12806
12807 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12808 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12809 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12810 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12811 // If the RHS is a constant we have to reverse the const
12812 // canonicalization.
12813 // x > C-1 ? x+-C : 0 --> usubsat x, C
12814 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12815 return (!Op && !Cond) ||
12816 (Op && Cond &&
12817 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12818 };
12819 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12820 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12821 /*AllowUndefs*/ true)) {
12822 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12823 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12824 }
12825
12826 // Another special case: If C was a sign bit, the sub has been
12827 // canonicalized into a xor.
12828 // FIXME: Would it be better to use computeKnownBits to
12829 // determine whether it's safe to decanonicalize the xor?
12830 // x s< 0 ? x^C : 0 --> usubsat x, C
12831 APInt SplatValue;
12832 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12833 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12835 SplatValue.isSignMask()) {
12836 // Note that we have to rebuild the RHS constant here to
12837 // ensure we don't rely on particular values of undef lanes.
12838 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12839 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12840 }
12841 }
12842 }
12843 }
12844 }
12845 }
12846 }
12847
12848 if (SimplifySelectOps(N, N1, N2))
12849 return SDValue(N, 0); // Don't revisit N.
12850
12851 // Fold (vselect all_ones, N1, N2) -> N1
12853 return N1;
12854 // Fold (vselect all_zeros, N1, N2) -> N2
12856 return N2;
12857
12858 // The ConvertSelectToConcatVector function is assuming both the above
12859 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12860 // and addressed.
12861 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12864 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12865 return CV;
12866 }
12867
12868 if (SDValue V = foldVSelectOfConstants(N))
12869 return V;
12870
12871 if (hasOperation(ISD::SRA, VT))
12873 return V;
12874
12876 return SDValue(N, 0);
12877
12878 return SDValue();
12879}
12880
12881SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12882 SDValue N0 = N->getOperand(0);
12883 SDValue N1 = N->getOperand(1);
12884 SDValue N2 = N->getOperand(2);
12885 SDValue N3 = N->getOperand(3);
12886 SDValue N4 = N->getOperand(4);
12887 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12888 SDLoc DL(N);
12889
12890 // fold select_cc lhs, rhs, x, x, cc -> x
12891 if (N2 == N3)
12892 return N2;
12893
12894 // select_cc bool, 0, x, y, seteq -> select bool, y, x
12895 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12896 isNullConstant(N1))
12897 return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
12898
12899 // Determine if the condition we're dealing with is constant
12900 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12901 CC, DL, false)) {
12902 AddToWorklist(SCC.getNode());
12903
12904 // cond always true -> true val
12905 // cond always false -> false val
12906 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12907 return SCCC->isZero() ? N3 : N2;
12908
12909 // When the condition is UNDEF, just return the first operand. This is
12910 // coherent the DAG creation, no setcc node is created in this case
12911 if (SCC->isUndef())
12912 return N2;
12913
12914 // Fold to a simpler select_cc
12915 if (SCC.getOpcode() == ISD::SETCC) {
12916 SDValue SelectOp =
12917 DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(), SCC.getOperand(0),
12918 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12919 SelectOp->setFlags(SCC->getFlags());
12920 return SelectOp;
12921 }
12922 }
12923
12924 // If we can fold this based on the true/false value, do so.
12925 if (SimplifySelectOps(N, N2, N3))
12926 return SDValue(N, 0); // Don't revisit N.
12927
12928 // fold select_cc into other things, such as min/max/abs
12929 return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
12930}
12931
12932SDValue DAGCombiner::visitSETCC(SDNode *N) {
12933 // setcc is very commonly used as an argument to brcond. This pattern
12934 // also lend itself to numerous combines and, as a result, it is desired
12935 // we keep the argument to a brcond as a setcc as much as possible.
12936 bool PreferSetCC =
12937 N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
12938
12939 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12940 EVT VT = N->getValueType(0);
12941 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12942 SDLoc DL(N);
12943
12944 if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
12945 // If we prefer to have a setcc, and we don't, we'll try our best to
12946 // recreate one using rebuildSetCC.
12947 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12948 SDValue NewSetCC = rebuildSetCC(Combined);
12949
12950 // We don't have anything interesting to combine to.
12951 if (NewSetCC.getNode() == N)
12952 return SDValue();
12953
12954 if (NewSetCC)
12955 return NewSetCC;
12956 }
12957 return Combined;
12958 }
12959
12960 // Optimize
12961 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
12962 // or
12963 // 2) (icmp eq/ne X, (rotate X, C1))
12964 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12965 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12966 // Then:
12967 // If C1 is a power of 2, then the rotate and shift+and versions are
12968 // equivilent, so we can interchange them depending on target preference.
12969 // Otherwise, if we have the shift+and version we can interchange srl/shl
12970 // which inturn affects the constant C0. We can use this to get better
12971 // constants again determined by target preference.
12972 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12973 auto IsAndWithShift = [](SDValue A, SDValue B) {
12974 return A.getOpcode() == ISD::AND &&
12975 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12976 A.getOperand(0) == B.getOperand(0);
12977 };
12978 auto IsRotateWithOp = [](SDValue A, SDValue B) {
12979 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12980 B.getOperand(0) == A;
12981 };
12982 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12983 bool IsRotate = false;
12984
12985 // Find either shift+and or rotate pattern.
12986 if (IsAndWithShift(N0, N1)) {
12987 AndOrOp = N0;
12988 ShiftOrRotate = N1;
12989 } else if (IsAndWithShift(N1, N0)) {
12990 AndOrOp = N1;
12991 ShiftOrRotate = N0;
12992 } else if (IsRotateWithOp(N0, N1)) {
12993 IsRotate = true;
12994 AndOrOp = N0;
12995 ShiftOrRotate = N1;
12996 } else if (IsRotateWithOp(N1, N0)) {
12997 IsRotate = true;
12998 AndOrOp = N1;
12999 ShiftOrRotate = N0;
13000 }
13001
13002 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
13003 (IsRotate || AndOrOp.hasOneUse())) {
13004 EVT OpVT = N0.getValueType();
13005 // Get constant shift/rotate amount and possibly mask (if its shift+and
13006 // variant).
13007 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
13008 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
13009 /*AllowTrunc*/ false);
13010 if (CNode == nullptr)
13011 return std::nullopt;
13012 return CNode->getAPIntValue();
13013 };
13014 std::optional<APInt> AndCMask =
13015 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
13016 std::optional<APInt> ShiftCAmt =
13017 GetAPIntValue(ShiftOrRotate.getOperand(1));
13018 unsigned NumBits = OpVT.getScalarSizeInBits();
13019
13020 // We found constants.
13021 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13022 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
13023 // Check that the constants meet the constraints.
13024 bool CanTransform = IsRotate;
13025 if (!CanTransform) {
13026 // Check that mask and shift compliment eachother
13027 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
13028 // Check that we are comparing all bits
13029 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13030 // Check that the and mask is correct for the shift
13031 CanTransform &=
13032 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
13033 }
13034
13035 // See if target prefers another shift/rotate opcode.
13036 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
13037 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13038 // Transform is valid and we have a new preference.
13039 if (CanTransform && NewShiftOpc != ShiftOpc) {
13040 SDValue NewShiftOrRotate =
13041 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
13042 ShiftOrRotate.getOperand(1));
13043 SDValue NewAndOrOp = SDValue();
13044
13045 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
13046 APInt NewMask =
13047 NewShiftOpc == ISD::SHL
13048 ? APInt::getHighBitsSet(NumBits,
13049 NumBits - ShiftCAmt->getZExtValue())
13050 : APInt::getLowBitsSet(NumBits,
13051 NumBits - ShiftCAmt->getZExtValue());
13052 NewAndOrOp =
13053 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
13054 DAG.getConstant(NewMask, DL, OpVT));
13055 } else {
13056 NewAndOrOp = ShiftOrRotate.getOperand(0);
13057 }
13058
13059 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
13060 }
13061 }
13062 }
13063 }
13064 return SDValue();
13065}
13066
13067SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
13068 SDValue LHS = N->getOperand(0);
13069 SDValue RHS = N->getOperand(1);
13070 SDValue Carry = N->getOperand(2);
13071 SDValue Cond = N->getOperand(3);
13072
13073 // If Carry is false, fold to a regular SETCC.
13074 if (isNullConstant(Carry))
13075 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
13076
13077 return SDValue();
13078}
13079
13080/// Check if N satisfies:
13081/// N is used once.
13082/// N is a Load.
13083/// The load is compatible with ExtOpcode. It means
13084/// If load has explicit zero/sign extension, ExpOpcode must have the same
13085/// extension.
13086/// Otherwise returns true.
13087static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
13088 if (!N.hasOneUse())
13089 return false;
13090
13091 if (!isa<LoadSDNode>(N))
13092 return false;
13093
13094 LoadSDNode *Load = cast<LoadSDNode>(N);
13095 ISD::LoadExtType LoadExt = Load->getExtensionType();
13096 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
13097 return true;
13098
13099 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
13100 // extension.
13101 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
13102 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
13103 return false;
13104
13105 return true;
13106}
13107
13108/// Fold
13109/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
13110/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
13111/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
13112/// This function is called by the DAGCombiner when visiting sext/zext/aext
13113/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13115 SelectionDAG &DAG, const SDLoc &DL,
13116 CombineLevel Level) {
13117 unsigned Opcode = N->getOpcode();
13118 SDValue N0 = N->getOperand(0);
13119 EVT VT = N->getValueType(0);
13120 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
13121 Opcode == ISD::ANY_EXTEND) &&
13122 "Expected EXTEND dag node in input!");
13123
13124 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
13125 !N0.hasOneUse())
13126 return SDValue();
13127
13128 SDValue Op1 = N0->getOperand(1);
13129 SDValue Op2 = N0->getOperand(2);
13130 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
13131 return SDValue();
13132
13133 auto ExtLoadOpcode = ISD::EXTLOAD;
13134 if (Opcode == ISD::SIGN_EXTEND)
13135 ExtLoadOpcode = ISD::SEXTLOAD;
13136 else if (Opcode == ISD::ZERO_EXTEND)
13137 ExtLoadOpcode = ISD::ZEXTLOAD;
13138
13139 // Illegal VSELECT may ISel fail if happen after legalization (DAG
13140 // Combine2), so we should conservatively check the OperationAction.
13141 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
13142 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
13143 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
13144 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
13145 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
13147 return SDValue();
13148
13149 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
13150 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
13151 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
13152}
13153
13154/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
13155/// a build_vector of constants.
13156/// This function is called by the DAGCombiner when visiting sext/zext/aext
13157/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13158/// Vector extends are not folded if operations are legal; this is to
13159/// avoid introducing illegal build_vector dag nodes.
13161 const TargetLowering &TLI,
13162 SelectionDAG &DAG, bool LegalTypes) {
13163 unsigned Opcode = N->getOpcode();
13164 SDValue N0 = N->getOperand(0);
13165 EVT VT = N->getValueType(0);
13166
13167 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
13168 "Expected EXTEND dag node in input!");
13169
13170 // fold (sext c1) -> c1
13171 // fold (zext c1) -> c1
13172 // fold (aext c1) -> c1
13173 if (isa<ConstantSDNode>(N0))
13174 return DAG.getNode(Opcode, DL, VT, N0);
13175
13176 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13177 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
13178 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13179 if (N0->getOpcode() == ISD::SELECT) {
13180 SDValue Op1 = N0->getOperand(1);
13181 SDValue Op2 = N0->getOperand(2);
13182 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
13183 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
13184 // For any_extend, choose sign extension of the constants to allow a
13185 // possible further transform to sign_extend_inreg.i.e.
13186 //
13187 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
13188 // t2: i64 = any_extend t1
13189 // -->
13190 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
13191 // -->
13192 // t4: i64 = sign_extend_inreg t3
13193 unsigned FoldOpc = Opcode;
13194 if (FoldOpc == ISD::ANY_EXTEND)
13195 FoldOpc = ISD::SIGN_EXTEND;
13196 return DAG.getSelect(DL, VT, N0->getOperand(0),
13197 DAG.getNode(FoldOpc, DL, VT, Op1),
13198 DAG.getNode(FoldOpc, DL, VT, Op2));
13199 }
13200 }
13201
13202 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
13203 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
13204 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
13205 EVT SVT = VT.getScalarType();
13206 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
13208 return SDValue();
13209
13210 // We can fold this node into a build_vector.
13211 unsigned VTBits = SVT.getSizeInBits();
13212 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
13214 unsigned NumElts = VT.getVectorNumElements();
13215
13216 for (unsigned i = 0; i != NumElts; ++i) {
13217 SDValue Op = N0.getOperand(i);
13218 if (Op.isUndef()) {
13219 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
13220 Elts.push_back(DAG.getUNDEF(SVT));
13221 else
13222 Elts.push_back(DAG.getConstant(0, DL, SVT));
13223 continue;
13224 }
13225
13226 SDLoc DL(Op);
13227 // Get the constant value and if needed trunc it to the size of the type.
13228 // Nodes like build_vector might have constants wider than the scalar type.
13229 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
13230 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
13231 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
13232 else
13233 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
13234 }
13235
13236 return DAG.getBuildVector(VT, DL, Elts);
13237}
13238
13239// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
13240// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
13241// transformation. Returns true if extension are possible and the above
13242// mentioned transformation is profitable.
13244 unsigned ExtOpc,
13245 SmallVectorImpl<SDNode *> &ExtendNodes,
13246 const TargetLowering &TLI) {
13247 bool HasCopyToRegUses = false;
13248 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
13249 for (SDUse &Use : N0->uses()) {
13250 SDNode *User = Use.getUser();
13251 if (User == N)
13252 continue;
13253 if (Use.getResNo() != N0.getResNo())
13254 continue;
13255 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
13256 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
13257 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13258 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
13259 // Sign bits will be lost after a zext.
13260 return false;
13261 bool Add = false;
13262 for (unsigned i = 0; i != 2; ++i) {
13263 SDValue UseOp = User->getOperand(i);
13264 if (UseOp == N0)
13265 continue;
13266 if (!isa<ConstantSDNode>(UseOp))
13267 return false;
13268 Add = true;
13269 }
13270 if (Add)
13271 ExtendNodes.push_back(User);
13272 continue;
13273 }
13274 // If truncates aren't free and there are users we can't
13275 // extend, it isn't worthwhile.
13276 if (!isTruncFree)
13277 return false;
13278 // Remember if this value is live-out.
13279 if (User->getOpcode() == ISD::CopyToReg)
13280 HasCopyToRegUses = true;
13281 }
13282
13283 if (HasCopyToRegUses) {
13284 bool BothLiveOut = false;
13285 for (SDUse &Use : N->uses()) {
13286 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
13287 BothLiveOut = true;
13288 break;
13289 }
13290 }
13291 if (BothLiveOut)
13292 // Both unextended and extended values are live out. There had better be
13293 // a good reason for the transformation.
13294 return !ExtendNodes.empty();
13295 }
13296 return true;
13297}
13298
13299void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
13300 SDValue OrigLoad, SDValue ExtLoad,
13301 ISD::NodeType ExtType) {
13302 // Extend SetCC uses if necessary.
13303 SDLoc DL(ExtLoad);
13304 for (SDNode *SetCC : SetCCs) {
13306
13307 for (unsigned j = 0; j != 2; ++j) {
13308 SDValue SOp = SetCC->getOperand(j);
13309 if (SOp == OrigLoad)
13310 Ops.push_back(ExtLoad);
13311 else
13312 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
13313 }
13314
13315 Ops.push_back(SetCC->getOperand(2));
13316 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
13317 }
13318}
13319
13320// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
13321SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
13322 SDValue N0 = N->getOperand(0);
13323 EVT DstVT = N->getValueType(0);
13324 EVT SrcVT = N0.getValueType();
13325
13326 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13327 N->getOpcode() == ISD::ZERO_EXTEND) &&
13328 "Unexpected node type (not an extend)!");
13329
13330 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
13331 // For example, on a target with legal v4i32, but illegal v8i32, turn:
13332 // (v8i32 (sext (v8i16 (load x))))
13333 // into:
13334 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13335 // (v4i32 (sextload (x + 16)))))
13336 // Where uses of the original load, i.e.:
13337 // (v8i16 (load x))
13338 // are replaced with:
13339 // (v8i16 (truncate
13340 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13341 // (v4i32 (sextload (x + 16)))))))
13342 //
13343 // This combine is only applicable to illegal, but splittable, vectors.
13344 // All legal types, and illegal non-vector types, are handled elsewhere.
13345 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
13346 //
13347 if (N0->getOpcode() != ISD::LOAD)
13348 return SDValue();
13349
13350 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13351
13352 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
13353 !N0.hasOneUse() || !LN0->isSimple() ||
13354 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
13356 return SDValue();
13357
13359 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
13360 return SDValue();
13361
13362 ISD::LoadExtType ExtType =
13363 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13364
13365 // Try to split the vector types to get down to legal types.
13366 EVT SplitSrcVT = SrcVT;
13367 EVT SplitDstVT = DstVT;
13368 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
13369 SplitSrcVT.getVectorNumElements() > 1) {
13370 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
13371 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
13372 }
13373
13374 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
13375 return SDValue();
13376
13377 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
13378
13379 SDLoc DL(N);
13380 const unsigned NumSplits =
13381 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
13382 const unsigned Stride = SplitSrcVT.getStoreSize();
13385
13386 SDValue BasePtr = LN0->getBasePtr();
13387 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
13388 const unsigned Offset = Idx * Stride;
13389
13391 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
13392 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
13393 SplitSrcVT, LN0->getOriginalAlign(),
13394 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13395
13396 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
13397
13398 Loads.push_back(SplitLoad.getValue(0));
13399 Chains.push_back(SplitLoad.getValue(1));
13400 }
13401
13402 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
13403 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
13404
13405 // Simplify TF.
13406 AddToWorklist(NewChain.getNode());
13407
13408 CombineTo(N, NewValue);
13409
13410 // Replace uses of the original load (before extension)
13411 // with a truncate of the concatenated sextloaded vectors.
13412 SDValue Trunc =
13413 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
13414 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
13415 CombineTo(N0.getNode(), Trunc, NewChain);
13416 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13417}
13418
13419// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13420// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13421SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
13422 assert(N->getOpcode() == ISD::ZERO_EXTEND);
13423 EVT VT = N->getValueType(0);
13424 EVT OrigVT = N->getOperand(0).getValueType();
13425 if (TLI.isZExtFree(OrigVT, VT))
13426 return SDValue();
13427
13428 // and/or/xor
13429 SDValue N0 = N->getOperand(0);
13430 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
13431 N0.getOperand(1).getOpcode() != ISD::Constant ||
13432 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
13433 return SDValue();
13434
13435 // shl/shr
13436 SDValue N1 = N0->getOperand(0);
13437 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
13438 N1.getOperand(1).getOpcode() != ISD::Constant ||
13439 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
13440 return SDValue();
13441
13442 // load
13443 if (!isa<LoadSDNode>(N1.getOperand(0)))
13444 return SDValue();
13445 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
13446 EVT MemVT = Load->getMemoryVT();
13447 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
13448 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
13449 return SDValue();
13450
13451
13452 // If the shift op is SHL, the logic op must be AND, otherwise the result
13453 // will be wrong.
13454 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
13455 return SDValue();
13456
13457 if (!N0.hasOneUse() || !N1.hasOneUse())
13458 return SDValue();
13459
13461 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
13462 ISD::ZERO_EXTEND, SetCCs, TLI))
13463 return SDValue();
13464
13465 // Actually do the transformation.
13466 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
13467 Load->getChain(), Load->getBasePtr(),
13468 Load->getMemoryVT(), Load->getMemOperand());
13469
13470 SDLoc DL1(N1);
13471 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
13472 N1.getOperand(1));
13473
13475 SDLoc DL0(N0);
13476 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
13477 DAG.getConstant(Mask, DL0, VT));
13478
13479 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13480 CombineTo(N, And);
13481 if (SDValue(Load, 0).hasOneUse()) {
13482 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
13483 } else {
13484 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
13485 Load->getValueType(0), ExtLoad);
13486 CombineTo(Load, Trunc, ExtLoad.getValue(1));
13487 }
13488
13489 // N0 is dead at this point.
13490 recursivelyDeleteUnusedNodes(N0.getNode());
13491
13492 return SDValue(N,0); // Return N so it doesn't get rechecked!
13493}
13494
13495/// If we're narrowing or widening the result of a vector select and the final
13496/// size is the same size as a setcc (compare) feeding the select, then try to
13497/// apply the cast operation to the select's operands because matching vector
13498/// sizes for a select condition and other operands should be more efficient.
13499SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
13500 unsigned CastOpcode = Cast->getOpcode();
13501 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
13502 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13503 CastOpcode == ISD::FP_ROUND) &&
13504 "Unexpected opcode for vector select narrowing/widening");
13505
13506 // We only do this transform before legal ops because the pattern may be
13507 // obfuscated by target-specific operations after legalization. Do not create
13508 // an illegal select op, however, because that may be difficult to lower.
13509 EVT VT = Cast->getValueType(0);
13510 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
13511 return SDValue();
13512
13513 SDValue VSel = Cast->getOperand(0);
13514 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
13515 VSel.getOperand(0).getOpcode() != ISD::SETCC)
13516 return SDValue();
13517
13518 // Does the setcc have the same vector size as the casted select?
13519 SDValue SetCC = VSel.getOperand(0);
13520 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13521 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13522 return SDValue();
13523
13524 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13525 SDValue A = VSel.getOperand(1);
13526 SDValue B = VSel.getOperand(2);
13527 SDValue CastA, CastB;
13528 SDLoc DL(Cast);
13529 if (CastOpcode == ISD::FP_ROUND) {
13530 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13531 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13532 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13533 } else {
13534 CastA = DAG.getNode(CastOpcode, DL, VT, A);
13535 CastB = DAG.getNode(CastOpcode, DL, VT, B);
13536 }
13537 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13538}
13539
13540// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13541// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13543 const TargetLowering &TLI, EVT VT,
13544 bool LegalOperations, SDNode *N,
13545 SDValue N0, ISD::LoadExtType ExtLoadType) {
13546 SDNode *N0Node = N0.getNode();
13547 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13548 : ISD::isZEXTLoad(N0Node);
13549 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13550 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13551 return SDValue();
13552
13553 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13554 EVT MemVT = LN0->getMemoryVT();
13555 if ((LegalOperations || !LN0->isSimple() ||
13556 VT.isVector()) &&
13557 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13558 return SDValue();
13559
13560 SDValue ExtLoad =
13561 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13562 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13563 Combiner.CombineTo(N, ExtLoad);
13564 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13565 if (LN0->use_empty())
13566 Combiner.recursivelyDeleteUnusedNodes(LN0);
13567 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13568}
13569
13570// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13571// Only generate vector extloads when 1) they're legal, and 2) they are
13572// deemed desirable by the target. NonNegZExt can be set to true if a zero
13573// extend has the nonneg flag to allow use of sextload if profitable.
13575 const TargetLowering &TLI, EVT VT,
13576 bool LegalOperations, SDNode *N, SDValue N0,
13577 ISD::LoadExtType ExtLoadType,
13578 ISD::NodeType ExtOpc,
13579 bool NonNegZExt = false) {
13581 return {};
13582
13583 // If this is zext nneg, see if it would make sense to treat it as a sext.
13584 if (NonNegZExt) {
13585 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
13586 "Unexpected load type or opcode");
13587 for (SDNode *User : N0->users()) {
13588 if (User->getOpcode() == ISD::SETCC) {
13589 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13591 ExtLoadType = ISD::SEXTLOAD;
13592 ExtOpc = ISD::SIGN_EXTEND;
13593 break;
13594 }
13595 }
13596 }
13597 }
13598
13599 // TODO: isFixedLengthVector() should be removed and any negative effects on
13600 // code generation being the result of that target's implementation of
13601 // isVectorLoadExtDesirable().
13602 if ((LegalOperations || VT.isFixedLengthVector() ||
13603 !cast<LoadSDNode>(N0)->isSimple()) &&
13604 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
13605 return {};
13606
13607 bool DoXform = true;
13609 if (!N0.hasOneUse())
13610 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13611 if (VT.isVector())
13612 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13613 if (!DoXform)
13614 return {};
13615
13616 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13617 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13618 LN0->getBasePtr(), N0.getValueType(),
13619 LN0->getMemOperand());
13620 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13621 // If the load value is used only by N, replace it via CombineTo N.
13622 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13623 Combiner.CombineTo(N, ExtLoad);
13624 if (NoReplaceTrunc) {
13625 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13626 Combiner.recursivelyDeleteUnusedNodes(LN0);
13627 } else {
13628 SDValue Trunc =
13629 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13630 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13631 }
13632 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13633}
13634
13635static SDValue
13637 bool LegalOperations, SDNode *N, SDValue N0,
13638 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13639 if (!N0.hasOneUse())
13640 return SDValue();
13641
13642 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13643 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13644 return SDValue();
13645
13646 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13647 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13648 return SDValue();
13649
13650 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13651 return SDValue();
13652
13653 SDLoc dl(Ld);
13654 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13655 SDValue NewLoad = DAG.getMaskedLoad(
13656 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13657 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13658 ExtLoadType, Ld->isExpandingLoad());
13659 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13660 return NewLoad;
13661}
13662
13663// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
13665 const TargetLowering &TLI, EVT VT,
13666 SDValue N0,
13667 ISD::LoadExtType ExtLoadType) {
13668 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
13669 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
13670 return {};
13671 EVT MemoryVT = ALoad->getMemoryVT();
13672 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
13673 return {};
13674 // Can't fold into ALoad if it is already extending differently.
13675 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
13676 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
13677 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
13678 return {};
13679
13680 EVT OrigVT = ALoad->getValueType(0);
13681 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
13682 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomic(
13683 ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
13684 ALoad->getBasePtr(), ALoad->getMemOperand()));
13685 NewALoad->setExtensionType(ExtLoadType);
13687 SDValue(ALoad, 0),
13688 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
13689 // Update the chain uses.
13690 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
13691 return SDValue(NewALoad, 0);
13692}
13693
13695 bool LegalOperations) {
13696 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13697 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13698
13699 SDValue SetCC = N->getOperand(0);
13700 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13701 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13702 return SDValue();
13703
13704 SDValue X = SetCC.getOperand(0);
13705 SDValue Ones = SetCC.getOperand(1);
13706 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13707 EVT VT = N->getValueType(0);
13708 EVT XVT = X.getValueType();
13709 // setge X, C is canonicalized to setgt, so we do not need to match that
13710 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13711 // not require the 'not' op.
13712 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13713 // Invert and smear/shift the sign bit:
13714 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13715 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13716 SDLoc DL(N);
13717 unsigned ShCt = VT.getSizeInBits() - 1;
13718 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13719 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13720 SDValue NotX = DAG.getNOT(DL, X, VT);
13721 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13722 auto ShiftOpcode =
13723 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13724 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13725 }
13726 }
13727 return SDValue();
13728}
13729
13730SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13731 SDValue N0 = N->getOperand(0);
13732 if (N0.getOpcode() != ISD::SETCC)
13733 return SDValue();
13734
13735 SDValue N00 = N0.getOperand(0);
13736 SDValue N01 = N0.getOperand(1);
13737 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13738 EVT VT = N->getValueType(0);
13739 EVT N00VT = N00.getValueType();
13740 SDLoc DL(N);
13741
13742 // Propagate fast-math-flags.
13743 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13744
13745 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13746 // the same size as the compared operands. Try to optimize sext(setcc())
13747 // if this is the case.
13748 if (VT.isVector() && !LegalOperations &&
13749 TLI.getBooleanContents(N00VT) ==
13751 EVT SVT = getSetCCResultType(N00VT);
13752
13753 // If we already have the desired type, don't change it.
13754 if (SVT != N0.getValueType()) {
13755 // We know that the # elements of the results is the same as the
13756 // # elements of the compare (and the # elements of the compare result
13757 // for that matter). Check to see that they are the same size. If so,
13758 // we know that the element size of the sext'd result matches the
13759 // element size of the compare operands.
13760 if (VT.getSizeInBits() == SVT.getSizeInBits())
13761 return DAG.getSetCC(DL, VT, N00, N01, CC);
13762
13763 // If the desired elements are smaller or larger than the source
13764 // elements, we can use a matching integer vector type and then
13765 // truncate/sign extend.
13766 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13767 if (SVT == MatchingVecType) {
13768 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13769 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13770 }
13771 }
13772
13773 // Try to eliminate the sext of a setcc by zexting the compare operands.
13774 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13776 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13777 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13778 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13779
13780 // We have an unsupported narrow vector compare op that would be legal
13781 // if extended to the destination type. See if the compare operands
13782 // can be freely extended to the destination type.
13783 auto IsFreeToExtend = [&](SDValue V) {
13784 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13785 return true;
13786 // Match a simple, non-extended load that can be converted to a
13787 // legal {z/s}ext-load.
13788 // TODO: Allow widening of an existing {z/s}ext-load?
13789 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13790 ISD::isUNINDEXEDLoad(V.getNode()) &&
13791 cast<LoadSDNode>(V)->isSimple() &&
13792 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13793 return false;
13794
13795 // Non-chain users of this value must either be the setcc in this
13796 // sequence or extends that can be folded into the new {z/s}ext-load.
13797 for (SDUse &Use : V->uses()) {
13798 // Skip uses of the chain and the setcc.
13799 SDNode *User = Use.getUser();
13800 if (Use.getResNo() != 0 || User == N0.getNode())
13801 continue;
13802 // Extra users must have exactly the same cast we are about to create.
13803 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13804 // is enhanced similarly.
13805 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13806 return false;
13807 }
13808 return true;
13809 };
13810
13811 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13812 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13813 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13814 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13815 }
13816 }
13817 }
13818
13819 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13820 // Here, T can be 1 or -1, depending on the type of the setcc and
13821 // getBooleanContents().
13822 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13823
13824 // To determine the "true" side of the select, we need to know the high bit
13825 // of the value returned by the setcc if it evaluates to true.
13826 // If the type of the setcc is i1, then the true case of the select is just
13827 // sext(i1 1), that is, -1.
13828 // If the type of the setcc is larger (say, i8) then the value of the high
13829 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13830 // of the appropriate width.
13831 SDValue ExtTrueVal = (SetCCWidth == 1)
13832 ? DAG.getAllOnesConstant(DL, VT)
13833 : DAG.getBoolConstant(true, DL, VT, N00VT);
13834 SDValue Zero = DAG.getConstant(0, DL, VT);
13835 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13836 return SCC;
13837
13838 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13839 EVT SetCCVT = getSetCCResultType(N00VT);
13840 // Don't do this transform for i1 because there's a select transform
13841 // that would reverse it.
13842 // TODO: We should not do this transform at all without a target hook
13843 // because a sext is likely cheaper than a select?
13844 if (SetCCVT.getScalarSizeInBits() != 1 &&
13845 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13846 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13847 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13848 }
13849 }
13850
13851 return SDValue();
13852}
13853
13854SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13855 SDValue N0 = N->getOperand(0);
13856 EVT VT = N->getValueType(0);
13857 SDLoc DL(N);
13858
13859 if (VT.isVector())
13860 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13861 return FoldedVOp;
13862
13863 // sext(undef) = 0 because the top bit will all be the same.
13864 if (N0.isUndef())
13865 return DAG.getConstant(0, DL, VT);
13866
13867 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13868 return Res;
13869
13870 // fold (sext (sext x)) -> (sext x)
13871 // fold (sext (aext x)) -> (sext x)
13872 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13873 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13874
13875 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13876 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13880 N0.getOperand(0));
13881
13882 // fold (sext (sext_inreg x)) -> (sext (trunc x))
13883 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13884 SDValue N00 = N0.getOperand(0);
13885 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13886 if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13887 (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13888 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13889 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13890 }
13891 }
13892
13893 if (N0.getOpcode() == ISD::TRUNCATE) {
13894 // fold (sext (truncate (load x))) -> (sext (smaller load x))
13895 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13896 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13897 SDNode *oye = N0.getOperand(0).getNode();
13898 if (NarrowLoad.getNode() != N0.getNode()) {
13899 CombineTo(N0.getNode(), NarrowLoad);
13900 // CombineTo deleted the truncate, if needed, but not what's under it.
13901 AddToWorklist(oye);
13902 }
13903 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13904 }
13905
13906 // See if the value being truncated is already sign extended. If so, just
13907 // eliminate the trunc/sext pair.
13908 SDValue Op = N0.getOperand(0);
13909 unsigned OpBits = Op.getScalarValueSizeInBits();
13910 unsigned MidBits = N0.getScalarValueSizeInBits();
13911 unsigned DestBits = VT.getScalarSizeInBits();
13912
13913 if (N0->getFlags().hasNoSignedWrap() ||
13914 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
13915 if (OpBits == DestBits) {
13916 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13917 // bits, it is already ready.
13918 return Op;
13919 }
13920
13921 if (OpBits < DestBits) {
13922 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13923 // bits, just sext from i32.
13924 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13925 }
13926
13927 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13928 // bits, just truncate to i32.
13930 Flags.setNoSignedWrap(true);
13931 Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
13932 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
13933 }
13934
13935 // fold (sext (truncate x)) -> (sextinreg x).
13936 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13937 N0.getValueType())) {
13938 if (OpBits < DestBits)
13939 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13940 else if (OpBits > DestBits)
13941 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13942 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13943 DAG.getValueType(N0.getValueType()));
13944 }
13945 }
13946
13947 // Try to simplify (sext (load x)).
13948 if (SDValue foldedExt =
13949 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13951 return foldedExt;
13952
13953 if (SDValue foldedExt =
13954 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13956 return foldedExt;
13957
13958 // fold (sext (load x)) to multiple smaller sextloads.
13959 // Only on illegal but splittable vectors.
13960 if (SDValue ExtLoad = CombineExtLoad(N))
13961 return ExtLoad;
13962
13963 // Try to simplify (sext (sextload x)).
13964 if (SDValue foldedExt = tryToFoldExtOfExtload(
13965 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13966 return foldedExt;
13967
13968 // Try to simplify (sext (atomic_load x)).
13969 if (SDValue foldedExt =
13970 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
13971 return foldedExt;
13972
13973 // fold (sext (and/or/xor (load x), cst)) ->
13974 // (and/or/xor (sextload x), (sext cst))
13975 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13976 isa<LoadSDNode>(N0.getOperand(0)) &&
13977 N0.getOperand(1).getOpcode() == ISD::Constant &&
13978 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13979 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13980 EVT MemVT = LN00->getMemoryVT();
13981 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13982 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13984 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13985 ISD::SIGN_EXTEND, SetCCs, TLI);
13986 if (DoXform) {
13987 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13988 LN00->getChain(), LN00->getBasePtr(),
13989 LN00->getMemoryVT(),
13990 LN00->getMemOperand());
13992 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13993 ExtLoad, DAG.getConstant(Mask, DL, VT));
13994 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13995 bool NoReplaceTruncAnd = !N0.hasOneUse();
13996 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13997 CombineTo(N, And);
13998 // If N0 has multiple uses, change other uses as well.
13999 if (NoReplaceTruncAnd) {
14000 SDValue TruncAnd =
14002 CombineTo(N0.getNode(), TruncAnd);
14003 }
14004 if (NoReplaceTrunc) {
14005 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14006 } else {
14007 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14008 LN00->getValueType(0), ExtLoad);
14009 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14010 }
14011 return SDValue(N,0); // Return N so it doesn't get rechecked!
14012 }
14013 }
14014 }
14015
14016 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14017 return V;
14018
14019 if (SDValue V = foldSextSetcc(N))
14020 return V;
14021
14022 // fold (sext x) -> (zext x) if the sign bit is known zero.
14023 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
14024 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
14025 DAG.SignBitIsZero(N0))
14026 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, SDNodeFlags::NonNeg);
14027
14028 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14029 return NewVSel;
14030
14031 // Eliminate this sign extend by doing a negation in the destination type:
14032 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
14033 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
14037 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
14038 return DAG.getNegative(Zext, DL, VT);
14039 }
14040 // Eliminate this sign extend by doing a decrement in the destination type:
14041 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
14042 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
14046 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14047 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14048 }
14049
14050 // fold sext (not i1 X) -> add (zext i1 X), -1
14051 // TODO: This could be extended to handle bool vectors.
14052 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
14053 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
14054 TLI.isOperationLegal(ISD::ADD, VT)))) {
14055 // If we can eliminate the 'not', the sext form should be better
14056 if (SDValue NewXor = visitXOR(N0.getNode())) {
14057 // Returning N0 is a form of in-visit replacement that may have
14058 // invalidated N0.
14059 if (NewXor.getNode() == N0.getNode()) {
14060 // Return SDValue here as the xor should have already been replaced in
14061 // this sext.
14062 return SDValue();
14063 }
14064
14065 // Return a new sext with the new xor.
14066 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
14067 }
14068
14069 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
14070 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14071 }
14072
14073 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14074 return Res;
14075
14076 return SDValue();
14077}
14078
14079/// Given an extending node with a pop-count operand, if the target does not
14080/// support a pop-count in the narrow source type but does support it in the
14081/// destination type, widen the pop-count to the destination type.
14082static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
14083 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
14084 Extend->getOpcode() == ISD::ANY_EXTEND) &&
14085 "Expected extend op");
14086
14087 SDValue CtPop = Extend->getOperand(0);
14088 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
14089 return SDValue();
14090
14091 EVT VT = Extend->getValueType(0);
14092 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14095 return SDValue();
14096
14097 // zext (ctpop X) --> ctpop (zext X)
14098 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
14099 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
14100}
14101
14102// If we have (zext (abs X)) where X is a type that will be promoted by type
14103// legalization, convert to (abs (sext X)). But don't extend past a legal type.
14104static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
14105 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
14106
14107 EVT VT = Extend->getValueType(0);
14108 if (VT.isVector())
14109 return SDValue();
14110
14111 SDValue Abs = Extend->getOperand(0);
14112 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
14113 return SDValue();
14114
14115 EVT AbsVT = Abs.getValueType();
14116 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14117 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
14119 return SDValue();
14120
14121 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
14122
14123 SDValue SExt =
14124 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
14125 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
14126 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
14127}
14128
14129SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
14130 SDValue N0 = N->getOperand(0);
14131 EVT VT = N->getValueType(0);
14132 SDLoc DL(N);
14133
14134 if (VT.isVector())
14135 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14136 return FoldedVOp;
14137
14138 // zext(undef) = 0
14139 if (N0.isUndef())
14140 return DAG.getConstant(0, DL, VT);
14141
14142 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14143 return Res;
14144
14145 // fold (zext (zext x)) -> (zext x)
14146 // fold (zext (aext x)) -> (zext x)
14147 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14149 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14150 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14151 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
14152 }
14153
14154 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14155 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14158 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
14159
14160 // fold (zext (truncate x)) -> (zext x) or
14161 // (zext (truncate x)) -> (truncate x)
14162 // This is valid when the truncated bits of x are already zero.
14163 SDValue Op;
14164 KnownBits Known;
14165 if (isTruncateOf(DAG, N0, Op, Known)) {
14166 APInt TruncatedBits =
14167 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
14168 APInt(Op.getScalarValueSizeInBits(), 0) :
14169 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
14171 std::min(Op.getScalarValueSizeInBits(),
14172 VT.getScalarSizeInBits()));
14173 if (TruncatedBits.isSubsetOf(Known.Zero)) {
14174 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14175 DAG.salvageDebugInfo(*N0.getNode());
14176
14177 return ZExtOrTrunc;
14178 }
14179 }
14180
14181 // fold (zext (truncate x)) -> (and x, mask)
14182 if (N0.getOpcode() == ISD::TRUNCATE) {
14183 // fold (zext (truncate (load x))) -> (zext (smaller load x))
14184 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
14185 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14186 SDNode *oye = N0.getOperand(0).getNode();
14187 if (NarrowLoad.getNode() != N0.getNode()) {
14188 CombineTo(N0.getNode(), NarrowLoad);
14189 // CombineTo deleted the truncate, if needed, but not what's under it.
14190 AddToWorklist(oye);
14191 }
14192 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14193 }
14194
14195 EVT SrcVT = N0.getOperand(0).getValueType();
14196 EVT MinVT = N0.getValueType();
14197
14198 if (N->getFlags().hasNonNeg()) {
14199 SDValue Op = N0.getOperand(0);
14200 unsigned OpBits = SrcVT.getScalarSizeInBits();
14201 unsigned MidBits = MinVT.getScalarSizeInBits();
14202 unsigned DestBits = VT.getScalarSizeInBits();
14203
14204 if (N0->getFlags().hasNoSignedWrap() ||
14205 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14206 if (OpBits == DestBits) {
14207 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14208 // bits, it is already ready.
14209 return Op;
14210 }
14211
14212 if (OpBits < DestBits) {
14213 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14214 // bits, just sext from i32.
14215 // FIXME: This can probably be ZERO_EXTEND nneg?
14216 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14217 }
14218
14219 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14220 // bits, just truncate to i32.
14222 Flags.setNoSignedWrap(true);
14223 Flags.setNoUnsignedWrap(true);
14224 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14225 }
14226 }
14227
14228 // Try to mask before the extension to avoid having to generate a larger mask,
14229 // possibly over several sub-vectors.
14230 if (SrcVT.bitsLT(VT) && VT.isVector()) {
14231 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
14233 SDValue Op = N0.getOperand(0);
14234 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
14235 AddToWorklist(Op.getNode());
14236 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14237 // Transfer the debug info; the new node is equivalent to N0.
14238 DAG.transferDbgValues(N0, ZExtOrTrunc);
14239 return ZExtOrTrunc;
14240 }
14241 }
14242
14243 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
14244 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14245 AddToWorklist(Op.getNode());
14246 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
14247 // We may safely transfer the debug info describing the truncate node over
14248 // to the equivalent and operation.
14249 DAG.transferDbgValues(N0, And);
14250 return And;
14251 }
14252 }
14253
14254 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
14255 // if either of the casts is not free.
14256 if (N0.getOpcode() == ISD::AND &&
14257 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14258 N0.getOperand(1).getOpcode() == ISD::Constant &&
14259 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
14260 !TLI.isZExtFree(N0.getValueType(), VT))) {
14261 SDValue X = N0.getOperand(0).getOperand(0);
14262 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
14264 return DAG.getNode(ISD::AND, DL, VT,
14265 X, DAG.getConstant(Mask, DL, VT));
14266 }
14267
14268 // Try to simplify (zext (load x)).
14269 if (SDValue foldedExt = tryToFoldExtOfLoad(
14270 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
14271 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
14272 return foldedExt;
14273
14274 if (SDValue foldedExt =
14275 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14277 return foldedExt;
14278
14279 // fold (zext (load x)) to multiple smaller zextloads.
14280 // Only on illegal but splittable vectors.
14281 if (SDValue ExtLoad = CombineExtLoad(N))
14282 return ExtLoad;
14283
14284 // Try to simplify (zext (atomic_load x)).
14285 if (SDValue foldedExt =
14286 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
14287 return foldedExt;
14288
14289 // fold (zext (and/or/xor (load x), cst)) ->
14290 // (and/or/xor (zextload x), (zext cst))
14291 // Unless (and (load x) cst) will match as a zextload already and has
14292 // additional users, or the zext is already free.
14293 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
14294 isa<LoadSDNode>(N0.getOperand(0)) &&
14295 N0.getOperand(1).getOpcode() == ISD::Constant &&
14296 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14297 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14298 EVT MemVT = LN00->getMemoryVT();
14299 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
14300 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
14301 bool DoXform = true;
14303 if (!N0.hasOneUse()) {
14304 if (N0.getOpcode() == ISD::AND) {
14305 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
14306 EVT LoadResultTy = AndC->getValueType(0);
14307 EVT ExtVT;
14308 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
14309 DoXform = false;
14310 }
14311 }
14312 if (DoXform)
14313 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14314 ISD::ZERO_EXTEND, SetCCs, TLI);
14315 if (DoXform) {
14316 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
14317 LN00->getChain(), LN00->getBasePtr(),
14318 LN00->getMemoryVT(),
14319 LN00->getMemOperand());
14321 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14322 ExtLoad, DAG.getConstant(Mask, DL, VT));
14323 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14324 bool NoReplaceTruncAnd = !N0.hasOneUse();
14325 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14326 CombineTo(N, And);
14327 // If N0 has multiple uses, change other uses as well.
14328 if (NoReplaceTruncAnd) {
14329 SDValue TruncAnd =
14331 CombineTo(N0.getNode(), TruncAnd);
14332 }
14333 if (NoReplaceTrunc) {
14334 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14335 } else {
14336 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14337 LN00->getValueType(0), ExtLoad);
14338 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14339 }
14340 return SDValue(N,0); // Return N so it doesn't get rechecked!
14341 }
14342 }
14343 }
14344
14345 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14346 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14347 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
14348 return ZExtLoad;
14349
14350 // Try to simplify (zext (zextload x)).
14351 if (SDValue foldedExt = tryToFoldExtOfExtload(
14352 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
14353 return foldedExt;
14354
14355 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14356 return V;
14357
14358 if (N0.getOpcode() == ISD::SETCC) {
14359 // Propagate fast-math-flags.
14360 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14361
14362 // Only do this before legalize for now.
14363 if (!LegalOperations && VT.isVector() &&
14364 N0.getValueType().getVectorElementType() == MVT::i1) {
14365 EVT N00VT = N0.getOperand(0).getValueType();
14366 if (getSetCCResultType(N00VT) == N0.getValueType())
14367 return SDValue();
14368
14369 // We know that the # elements of the results is the same as the #
14370 // elements of the compare (and the # elements of the compare result for
14371 // that matter). Check to see that they are the same size. If so, we know
14372 // that the element size of the sext'd result matches the element size of
14373 // the compare operands.
14374 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
14375 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
14376 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
14377 N0.getOperand(1), N0.getOperand(2));
14378 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
14379 }
14380
14381 // If the desired elements are smaller or larger than the source
14382 // elements we can use a matching integer vector type and then
14383 // truncate/any extend followed by zext_in_reg.
14384 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14385 SDValue VsetCC =
14386 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
14387 N0.getOperand(1), N0.getOperand(2));
14388 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
14389 N0.getValueType());
14390 }
14391
14392 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
14393 EVT N0VT = N0.getValueType();
14394 EVT N00VT = N0.getOperand(0).getValueType();
14395 if (SDValue SCC = SimplifySelectCC(
14396 DL, N0.getOperand(0), N0.getOperand(1),
14397 DAG.getBoolConstant(true, DL, N0VT, N00VT),
14398 DAG.getBoolConstant(false, DL, N0VT, N00VT),
14399 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14400 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
14401 }
14402
14403 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
14404 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
14405 !TLI.isZExtFree(N0, VT)) {
14406 SDValue ShVal = N0.getOperand(0);
14407 SDValue ShAmt = N0.getOperand(1);
14408 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
14409 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
14410 if (N0.getOpcode() == ISD::SHL) {
14411 // If the original shl may be shifting out bits, do not perform this
14412 // transformation.
14413 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
14414 ShVal.getOperand(0).getValueSizeInBits();
14415 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
14416 // If the shift is too large, then see if we can deduce that the
14417 // shift is safe anyway.
14418
14419 // Check if the bits being shifted out are known to be zero.
14420 KnownBits KnownShVal = DAG.computeKnownBits(ShVal);
14421 if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
14422 return SDValue();
14423 }
14424 }
14425
14426 // Ensure that the shift amount is wide enough for the shifted value.
14427 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
14428 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
14429
14430 return DAG.getNode(N0.getOpcode(), DL, VT,
14431 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
14432 }
14433 }
14434 }
14435
14436 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14437 return NewVSel;
14438
14439 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
14440 return NewCtPop;
14441
14442 if (SDValue V = widenAbs(N, DAG))
14443 return V;
14444
14445 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14446 return Res;
14447
14448 // CSE zext nneg with sext if the zext is not free.
14449 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
14450 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
14451 if (CSENode)
14452 return SDValue(CSENode, 0);
14453 }
14454
14455 return SDValue();
14456}
14457
14458SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
14459 SDValue N0 = N->getOperand(0);
14460 EVT VT = N->getValueType(0);
14461 SDLoc DL(N);
14462
14463 // aext(undef) = undef
14464 if (N0.isUndef())
14465 return DAG.getUNDEF(VT);
14466
14467 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14468 return Res;
14469
14470 // fold (aext (aext x)) -> (aext x)
14471 // fold (aext (zext x)) -> (zext x)
14472 // fold (aext (sext x)) -> (sext x)
14473 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
14474 N0.getOpcode() == ISD::SIGN_EXTEND) {
14476 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14477 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14478 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
14479 }
14480
14481 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
14482 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14483 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14487 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14488
14489 // fold (aext (truncate (load x))) -> (aext (smaller load x))
14490 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
14491 if (N0.getOpcode() == ISD::TRUNCATE) {
14492 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14493 SDNode *oye = N0.getOperand(0).getNode();
14494 if (NarrowLoad.getNode() != N0.getNode()) {
14495 CombineTo(N0.getNode(), NarrowLoad);
14496 // CombineTo deleted the truncate, if needed, but not what's under it.
14497 AddToWorklist(oye);
14498 }
14499 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14500 }
14501 }
14502
14503 // fold (aext (truncate x))
14504 if (N0.getOpcode() == ISD::TRUNCATE)
14505 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14506
14507 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
14508 // if the trunc is not free.
14509 if (N0.getOpcode() == ISD::AND &&
14510 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14511 N0.getOperand(1).getOpcode() == ISD::Constant &&
14512 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
14513 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14514 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
14515 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
14516 return DAG.getNode(ISD::AND, DL, VT, X, Y);
14517 }
14518
14519 // fold (aext (load x)) -> (aext (truncate (extload x)))
14520 // None of the supported targets knows how to perform load and any_ext
14521 // on vectors in one instruction, so attempt to fold to zext instead.
14522 if (VT.isVector()) {
14523 // Try to simplify (zext (load x)).
14524 if (SDValue foldedExt =
14525 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14527 return foldedExt;
14528 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
14530 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14531 bool DoXform = true;
14533 if (!N0.hasOneUse())
14534 DoXform =
14535 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
14536 if (DoXform) {
14537 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14538 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
14539 LN0->getBasePtr(), N0.getValueType(),
14540 LN0->getMemOperand());
14541 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
14542 // If the load value is used only by N, replace it via CombineTo N.
14543 bool NoReplaceTrunc = N0.hasOneUse();
14544 CombineTo(N, ExtLoad);
14545 if (NoReplaceTrunc) {
14546 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14547 recursivelyDeleteUnusedNodes(LN0);
14548 } else {
14549 SDValue Trunc =
14550 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14551 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14552 }
14553 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14554 }
14555 }
14556
14557 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14558 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14559 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
14560 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14561 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14562 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14563 ISD::LoadExtType ExtType = LN0->getExtensionType();
14564 EVT MemVT = LN0->getMemoryVT();
14565 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14566 SDValue ExtLoad =
14567 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14568 MemVT, LN0->getMemOperand());
14569 CombineTo(N, ExtLoad);
14570 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14571 recursivelyDeleteUnusedNodes(LN0);
14572 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14573 }
14574 }
14575
14576 if (N0.getOpcode() == ISD::SETCC) {
14577 // Propagate fast-math-flags.
14578 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14579
14580 // For vectors:
14581 // aext(setcc) -> vsetcc
14582 // aext(setcc) -> truncate(vsetcc)
14583 // aext(setcc) -> aext(vsetcc)
14584 // Only do this before legalize for now.
14585 if (VT.isVector() && !LegalOperations) {
14586 EVT N00VT = N0.getOperand(0).getValueType();
14587 if (getSetCCResultType(N00VT) == N0.getValueType())
14588 return SDValue();
14589
14590 // We know that the # elements of the results is the same as the
14591 // # elements of the compare (and the # elements of the compare result
14592 // for that matter). Check to see that they are the same size. If so,
14593 // we know that the element size of the sext'd result matches the
14594 // element size of the compare operands.
14595 if (VT.getSizeInBits() == N00VT.getSizeInBits())
14596 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
14597 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14598
14599 // If the desired elements are smaller or larger than the source
14600 // elements we can use a matching integer vector type and then
14601 // truncate/any extend
14602 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14603 SDValue VsetCC = DAG.getSetCC(
14604 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
14605 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14606 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
14607 }
14608
14609 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14610 if (SDValue SCC = SimplifySelectCC(
14611 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14612 DAG.getConstant(0, DL, VT),
14613 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14614 return SCC;
14615 }
14616
14617 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
14618 return NewCtPop;
14619
14620 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14621 return Res;
14622
14623 return SDValue();
14624}
14625
14626SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14627 unsigned Opcode = N->getOpcode();
14628 SDValue N0 = N->getOperand(0);
14629 SDValue N1 = N->getOperand(1);
14630 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14631
14632 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14633 if (N0.getOpcode() == Opcode &&
14634 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14635 return N0;
14636
14637 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14638 N0.getOperand(0).getOpcode() == Opcode) {
14639 // We have an assert, truncate, assert sandwich. Make one stronger assert
14640 // by asserting on the smallest asserted type to the larger source type.
14641 // This eliminates the later assert:
14642 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14643 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14644 SDLoc DL(N);
14645 SDValue BigA = N0.getOperand(0);
14646 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14647 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14648 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14649 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14650 BigA.getOperand(0), MinAssertVTVal);
14651 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14652 }
14653
14654 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14655 // than X. Just move the AssertZext in front of the truncate and drop the
14656 // AssertSExt.
14657 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14659 Opcode == ISD::AssertZext) {
14660 SDValue BigA = N0.getOperand(0);
14661 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14662 if (AssertVT.bitsLT(BigA_AssertVT)) {
14663 SDLoc DL(N);
14664 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14665 BigA.getOperand(0), N1);
14666 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14667 }
14668 }
14669
14670 return SDValue();
14671}
14672
14673SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14674 SDLoc DL(N);
14675
14676 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14677 SDValue N0 = N->getOperand(0);
14678
14679 // Fold (assertalign (assertalign x, AL0), AL1) ->
14680 // (assertalign x, max(AL0, AL1))
14681 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14682 return DAG.getAssertAlign(DL, N0.getOperand(0),
14683 std::max(AL, AAN->getAlign()));
14684
14685 // In rare cases, there are trivial arithmetic ops in source operands. Sink
14686 // this assert down to source operands so that those arithmetic ops could be
14687 // exposed to the DAG combining.
14688 switch (N0.getOpcode()) {
14689 default:
14690 break;
14691 case ISD::ADD:
14692 case ISD::SUB: {
14693 unsigned AlignShift = Log2(AL);
14694 SDValue LHS = N0.getOperand(0);
14695 SDValue RHS = N0.getOperand(1);
14696 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14697 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14698 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14699 if (LHSAlignShift < AlignShift)
14700 LHS = DAG.getAssertAlign(DL, LHS, AL);
14701 if (RHSAlignShift < AlignShift)
14702 RHS = DAG.getAssertAlign(DL, RHS, AL);
14703 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14704 }
14705 break;
14706 }
14707 }
14708
14709 return SDValue();
14710}
14711
14712/// If the result of a load is shifted/masked/truncated to an effectively
14713/// narrower type, try to transform the load to a narrower type and/or
14714/// use an extending load.
14715SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14716 unsigned Opc = N->getOpcode();
14717
14719 SDValue N0 = N->getOperand(0);
14720 EVT VT = N->getValueType(0);
14721 EVT ExtVT = VT;
14722
14723 // This transformation isn't valid for vector loads.
14724 if (VT.isVector())
14725 return SDValue();
14726
14727 // The ShAmt variable is used to indicate that we've consumed a right
14728 // shift. I.e. we want to narrow the width of the load by skipping to load the
14729 // ShAmt least significant bits.
14730 unsigned ShAmt = 0;
14731 // A special case is when the least significant bits from the load are masked
14732 // away, but using an AND rather than a right shift. HasShiftedOffset is used
14733 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14734 // the result.
14735 unsigned ShiftedOffset = 0;
14736 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14737 // extended to VT.
14738 if (Opc == ISD::SIGN_EXTEND_INREG) {
14739 ExtType = ISD::SEXTLOAD;
14740 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14741 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14742 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14743 // value, or it may be shifting a higher subword, half or byte into the
14744 // lowest bits.
14745
14746 // Only handle shift with constant shift amount, and the shiftee must be a
14747 // load.
14748 auto *LN = dyn_cast<LoadSDNode>(N0);
14749 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14750 if (!N1C || !LN)
14751 return SDValue();
14752 // If the shift amount is larger than the memory type then we're not
14753 // accessing any of the loaded bytes.
14754 ShAmt = N1C->getZExtValue();
14755 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14756 if (MemoryWidth <= ShAmt)
14757 return SDValue();
14758 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14759 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14760 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14761 // If original load is a SEXTLOAD then we can't simply replace it by a
14762 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14763 // followed by a ZEXT, but that is not handled at the moment). Similarly if
14764 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14765 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14766 LN->getExtensionType() == ISD::ZEXTLOAD) &&
14767 LN->getExtensionType() != ExtType)
14768 return SDValue();
14769 } else if (Opc == ISD::AND) {
14770 // An AND with a constant mask is the same as a truncate + zero-extend.
14771 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14772 if (!AndC)
14773 return SDValue();
14774
14775 const APInt &Mask = AndC->getAPIntValue();
14776 unsigned ActiveBits = 0;
14777 if (Mask.isMask()) {
14778 ActiveBits = Mask.countr_one();
14779 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14780 ShiftedOffset = ShAmt;
14781 } else {
14782 return SDValue();
14783 }
14784
14785 ExtType = ISD::ZEXTLOAD;
14786 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14787 }
14788
14789 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14790 // a right shift. Here we redo some of those checks, to possibly adjust the
14791 // ExtVT even further based on "a masking AND". We could also end up here for
14792 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14793 // need to be done here as well.
14794 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14795 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14796 // Bail out when the SRL has more than one use. This is done for historical
14797 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14798 // check below? And maybe it could be non-profitable to do the transform in
14799 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14800 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14801 if (!SRL.hasOneUse())
14802 return SDValue();
14803
14804 // Only handle shift with constant shift amount, and the shiftee must be a
14805 // load.
14806 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14807 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14808 if (!SRL1C || !LN)
14809 return SDValue();
14810
14811 // If the shift amount is larger than the input type then we're not
14812 // accessing any of the loaded bytes. If the load was a zextload/extload
14813 // then the result of the shift+trunc is zero/undef (handled elsewhere).
14814 ShAmt = SRL1C->getZExtValue();
14815 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14816 if (ShAmt >= MemoryWidth)
14817 return SDValue();
14818
14819 // Because a SRL must be assumed to *need* to zero-extend the high bits
14820 // (as opposed to anyext the high bits), we can't combine the zextload
14821 // lowering of SRL and an sextload.
14822 if (LN->getExtensionType() == ISD::SEXTLOAD)
14823 return SDValue();
14824
14825 // Avoid reading outside the memory accessed by the original load (could
14826 // happened if we only adjust the load base pointer by ShAmt). Instead we
14827 // try to narrow the load even further. The typical scenario here is:
14828 // (i64 (truncate (i96 (srl (load x), 64)))) ->
14829 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14830 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14831 // Don't replace sextload by zextload.
14832 if (ExtType == ISD::SEXTLOAD)
14833 return SDValue();
14834 // Narrow the load.
14835 ExtType = ISD::ZEXTLOAD;
14836 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14837 }
14838
14839 // If the SRL is only used by a masking AND, we may be able to adjust
14840 // the ExtVT to make the AND redundant.
14841 SDNode *Mask = *(SRL->user_begin());
14842 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14843 isa<ConstantSDNode>(Mask->getOperand(1))) {
14844 unsigned Offset, ActiveBits;
14845 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14846 if (ShiftMask.isMask()) {
14847 EVT MaskedVT =
14848 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14849 // If the mask is smaller, recompute the type.
14850 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14851 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14852 ExtVT = MaskedVT;
14853 } else if (ExtType == ISD::ZEXTLOAD &&
14854 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
14855 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
14856 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14857 // If the mask is shifted we can use a narrower load and a shl to insert
14858 // the trailing zeros.
14859 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
14860 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
14861 ExtVT = MaskedVT;
14862 ShAmt = Offset + ShAmt;
14863 ShiftedOffset = Offset;
14864 }
14865 }
14866 }
14867
14868 N0 = SRL.getOperand(0);
14869 }
14870
14871 // If the load is shifted left (and the result isn't shifted back right), we
14872 // can fold a truncate through the shift. The typical scenario is that N
14873 // points at a TRUNCATE here so the attempted fold is:
14874 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14875 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14876 unsigned ShLeftAmt = 0;
14877 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14878 ExtVT == VT && TLI.isNarrowingProfitable(N, N0.getValueType(), VT)) {
14879 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14880 ShLeftAmt = N01->getZExtValue();
14881 N0 = N0.getOperand(0);
14882 }
14883 }
14884
14885 // If we haven't found a load, we can't narrow it.
14886 if (!isa<LoadSDNode>(N0))
14887 return SDValue();
14888
14889 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14890 // Reducing the width of a volatile load is illegal. For atomics, we may be
14891 // able to reduce the width provided we never widen again. (see D66309)
14892 if (!LN0->isSimple() ||
14893 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14894 return SDValue();
14895
14896 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14897 unsigned LVTStoreBits =
14899 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14900 return LVTStoreBits - EVTStoreBits - ShAmt;
14901 };
14902
14903 // We need to adjust the pointer to the load by ShAmt bits in order to load
14904 // the correct bytes.
14905 unsigned PtrAdjustmentInBits =
14906 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14907
14908 uint64_t PtrOff = PtrAdjustmentInBits / 8;
14909 SDLoc DL(LN0);
14910 // The original load itself didn't wrap, so an offset within it doesn't.
14911 SDValue NewPtr =
14914 AddToWorklist(NewPtr.getNode());
14915
14916 SDValue Load;
14917 if (ExtType == ISD::NON_EXTLOAD)
14918 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14919 LN0->getPointerInfo().getWithOffset(PtrOff),
14920 LN0->getOriginalAlign(),
14921 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14922 else
14923 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14924 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14925 LN0->getOriginalAlign(),
14926 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14927
14928 // Replace the old load's chain with the new load's chain.
14929 WorklistRemover DeadNodes(*this);
14930 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14931
14932 // Shift the result left, if we've swallowed a left shift.
14934 if (ShLeftAmt != 0) {
14935 // If the shift amount is as large as the result size (but, presumably,
14936 // no larger than the source) then the useful bits of the result are
14937 // zero; we can't simply return the shortened shift, because the result
14938 // of that operation is undefined.
14939 if (ShLeftAmt >= VT.getScalarSizeInBits())
14940 Result = DAG.getConstant(0, DL, VT);
14941 else
14942 Result = DAG.getNode(ISD::SHL, DL, VT, Result,
14943 DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
14944 }
14945
14946 if (ShiftedOffset != 0) {
14947 // We're using a shifted mask, so the load now has an offset. This means
14948 // that data has been loaded into the lower bytes than it would have been
14949 // before, so we need to shl the loaded data into the correct position in the
14950 // register.
14951 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
14952 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14953 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14954 }
14955
14956 // Return the new loaded value.
14957 return Result;
14958}
14959
14960SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14961 SDValue N0 = N->getOperand(0);
14962 SDValue N1 = N->getOperand(1);
14963 EVT VT = N->getValueType(0);
14964 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14965 unsigned VTBits = VT.getScalarSizeInBits();
14966 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14967 SDLoc DL(N);
14968
14969 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14970 if (N0.isUndef())
14971 return DAG.getConstant(0, DL, VT);
14972
14973 // fold (sext_in_reg c1) -> c1
14974 if (SDValue C =
14976 return C;
14977
14978 // If the input is already sign extended, just drop the extension.
14979 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14980 return N0;
14981
14982 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14983 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14984 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14985 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N0.getOperand(0), N1);
14986
14987 // fold (sext_in_reg (sext x)) -> (sext x)
14988 // fold (sext_in_reg (aext x)) -> (sext x)
14989 // if x is small enough or if we know that x has more than 1 sign bit and the
14990 // sign_extend_inreg is extending from one of them.
14991 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14992 SDValue N00 = N0.getOperand(0);
14993 unsigned N00Bits = N00.getScalarValueSizeInBits();
14994 if ((N00Bits <= ExtVTBits ||
14995 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14996 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14997 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
14998 }
14999
15000 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
15001 // if x is small enough or if we know that x has more than 1 sign bit and the
15002 // sign_extend_inreg is extending from one of them.
15004 SDValue N00 = N0.getOperand(0);
15005 unsigned N00Bits = N00.getScalarValueSizeInBits();
15006 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
15007 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
15008 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
15009 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
15010 if ((N00Bits == ExtVTBits ||
15011 (!IsZext && (N00Bits < ExtVTBits ||
15012 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
15013 (!LegalOperations ||
15015 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, N00);
15016 }
15017
15018 // fold (sext_in_reg (zext x)) -> (sext x)
15019 // iff we are extending the source sign bit.
15020 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
15021 SDValue N00 = N0.getOperand(0);
15022 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
15023 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15024 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15025 }
15026
15027 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
15028 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
15029 return DAG.getZeroExtendInReg(N0, DL, ExtVT);
15030
15031 // fold operands of sext_in_reg based on knowledge that the top bits are not
15032 // demanded.
15034 return SDValue(N, 0);
15035
15036 // fold (sext_in_reg (load x)) -> (smaller sextload x)
15037 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
15038 if (SDValue NarrowLoad = reduceLoadWidth(N))
15039 return NarrowLoad;
15040
15041 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
15042 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
15043 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
15044 if (N0.getOpcode() == ISD::SRL) {
15045 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
15046 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
15047 // We can turn this into an SRA iff the input to the SRL is already sign
15048 // extended enough.
15049 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
15050 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
15051 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
15052 N0.getOperand(1));
15053 }
15054 }
15055
15056 // fold (sext_inreg (extload x)) -> (sextload x)
15057 // If sextload is not supported by target, we can only do the combine when
15058 // load has one use. Doing otherwise can block folding the extload with other
15059 // extends that the target does support.
15061 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15062 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
15063 N0.hasOneUse()) ||
15064 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15065 auto *LN0 = cast<LoadSDNode>(N0);
15066 SDValue ExtLoad =
15067 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15068 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15069 CombineTo(N, ExtLoad);
15070 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15071 AddToWorklist(ExtLoad.getNode());
15072 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15073 }
15074
15075 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
15077 N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15078 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
15079 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15080 auto *LN0 = cast<LoadSDNode>(N0);
15081 SDValue ExtLoad =
15082 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15083 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15084 CombineTo(N, ExtLoad);
15085 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15086 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15087 }
15088
15089 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
15090 // ignore it if the masked load is already sign extended
15091 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
15092 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
15093 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
15094 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
15095 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
15096 VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
15097 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
15098 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
15099 CombineTo(N, ExtMaskedLoad);
15100 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
15101 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15102 }
15103 }
15104
15105 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
15106 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
15107 if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
15109 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
15110 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
15111
15112 SDValue ExtLoad = DAG.getMaskedGather(
15113 DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
15114 GN0->getIndexType(), ISD::SEXTLOAD);
15115
15116 CombineTo(N, ExtLoad);
15117 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15118 AddToWorklist(ExtLoad.getNode());
15119 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15120 }
15121 }
15122
15123 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
15124 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
15125 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
15126 N0.getOperand(1), false))
15127 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, BSwap, N1);
15128 }
15129
15130 // Fold (iM_signext_inreg
15131 // (extract_subvector (zext|anyext|sext iN_v to _) _)
15132 // from iN)
15133 // -> (extract_subvector (signext iN_v to iM))
15134 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
15136 SDValue InnerExt = N0.getOperand(0);
15137 EVT InnerExtVT = InnerExt->getValueType(0);
15138 SDValue Extendee = InnerExt->getOperand(0);
15139
15140 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
15141 (!LegalOperations ||
15142 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
15143 SDValue SignExtExtendee =
15144 DAG.getNode(ISD::SIGN_EXTEND, DL, InnerExtVT, Extendee);
15145 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SignExtExtendee,
15146 N0.getOperand(1));
15147 }
15148 }
15149
15150 return SDValue();
15151}
15152
15154 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
15155 bool LegalOperations) {
15156 unsigned InregOpcode = N->getOpcode();
15157 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
15158
15159 SDValue Src = N->getOperand(0);
15160 EVT VT = N->getValueType(0);
15161 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
15162 Src.getValueType().getVectorElementType(),
15164
15165 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
15166 "Expected EXTEND_VECTOR_INREG dag node in input!");
15167
15168 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
15169 // FIXME: one-use check may be overly restrictive
15170 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
15171 return SDValue();
15172
15173 // Profitability check: we must be extending exactly one of it's operands.
15174 // FIXME: this is probably overly restrictive.
15175 Src = Src.getOperand(0);
15176 if (Src.getValueType() != SrcVT)
15177 return SDValue();
15178
15179 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
15180 return SDValue();
15181
15182 return DAG.getNode(Opcode, DL, VT, Src);
15183}
15184
15185SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
15186 SDValue N0 = N->getOperand(0);
15187 EVT VT = N->getValueType(0);
15188 SDLoc DL(N);
15189
15190 if (N0.isUndef()) {
15191 // aext_vector_inreg(undef) = undef because the top bits are undefined.
15192 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
15193 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
15194 ? DAG.getUNDEF(VT)
15195 : DAG.getConstant(0, DL, VT);
15196 }
15197
15198 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15199 return Res;
15200
15202 return SDValue(N, 0);
15203
15205 LegalOperations))
15206 return R;
15207
15208 return SDValue();
15209}
15210
15211SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
15212 EVT VT = N->getValueType(0);
15213 SDValue N0 = N->getOperand(0);
15214
15215 SDValue FPVal;
15216 if (sd_match(N0, m_FPToUI(m_Value(FPVal))) &&
15218 ISD::FP_TO_UINT_SAT, FPVal.getValueType(), VT))
15219 return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), VT, FPVal,
15220 DAG.getValueType(VT.getScalarType()));
15221
15222 return SDValue();
15223}
15224
15225/// Detect patterns of truncation with unsigned saturation:
15226///
15227/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
15228/// Return the source value x to be truncated or SDValue() if the pattern was
15229/// not matched.
15230///
15232 unsigned NumDstBits = VT.getScalarSizeInBits();
15233 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15234 // Saturation with truncation. We truncate from InVT to VT.
15235 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15236
15237 SDValue Min;
15238 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
15239 if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
15240 return Min;
15241
15242 return SDValue();
15243}
15244
15245/// Detect patterns of truncation with signed saturation:
15246/// (truncate (smin (smax (x, signed_min_of_dest_type),
15247/// signed_max_of_dest_type)) to dest_type)
15248/// or:
15249/// (truncate (smax (smin (x, signed_max_of_dest_type),
15250/// signed_min_of_dest_type)) to dest_type).
15251///
15252/// Return the source value to be truncated or SDValue() if the pattern was not
15253/// matched.
15255 unsigned NumDstBits = VT.getScalarSizeInBits();
15256 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15257 // Saturation with truncation. We truncate from InVT to VT.
15258 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15259
15260 SDValue Val;
15261 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
15262 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
15263
15264 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
15265 m_SpecificInt(SignedMax))))
15266 return Val;
15267
15268 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
15269 m_SpecificInt(SignedMin))))
15270 return Val;
15271
15272 return SDValue();
15273}
15274
15275/// Detect patterns of truncation with unsigned saturation:
15277 const SDLoc &DL) {
15278 unsigned NumDstBits = VT.getScalarSizeInBits();
15279 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15280 // Saturation with truncation. We truncate from InVT to VT.
15281 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15282
15283 SDValue Val;
15284 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
15285 // Min == 0, Max is unsigned max of destination type.
15286 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
15287 m_Zero())))
15288 return Val;
15289
15290 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
15291 m_SpecificInt(UnsignedMax))))
15292 return Val;
15293
15294 if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
15295 m_SpecificInt(UnsignedMax))))
15296 return Val;
15297
15298 return SDValue();
15299}
15300
15301static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
15302 SDLoc &DL, const TargetLowering &TLI,
15303 SelectionDAG &DAG) {
15304 auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
15305 return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
15306 TLI.isTypeDesirableForOp(Opc, VT));
15307 };
15308
15309 if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
15310 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
15311 if (SDValue SSatVal = detectSSatSPattern(Src, VT))
15312 return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
15313 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
15314 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
15315 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
15316 } else if (Src.getOpcode() == ISD::UMIN) {
15317 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
15318 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
15319 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
15320 if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
15321 if (SDValue USatVal = detectUSatUPattern(Src, VT))
15322 return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
15323 }
15324
15325 return SDValue();
15326}
15327
15328SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
15329 SDValue N0 = N->getOperand(0);
15330 EVT VT = N->getValueType(0);
15331 EVT SrcVT = N0.getValueType();
15332 bool isLE = DAG.getDataLayout().isLittleEndian();
15333 SDLoc DL(N);
15334
15335 // trunc(undef) = undef
15336 if (N0.isUndef())
15337 return DAG.getUNDEF(VT);
15338
15339 // fold (truncate (truncate x)) -> (truncate x)
15340 if (N0.getOpcode() == ISD::TRUNCATE)
15341 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15342
15343 // fold saturated truncate
15344 if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
15345 return SaturatedTR;
15346
15347 // fold (truncate c1) -> c1
15348 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
15349 return C;
15350
15351 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
15352 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
15353 N0.getOpcode() == ISD::SIGN_EXTEND ||
15354 N0.getOpcode() == ISD::ANY_EXTEND) {
15355 // if the source is smaller than the dest, we still need an extend.
15356 if (N0.getOperand(0).getValueType().bitsLT(VT))
15357 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
15358 // if the source is larger than the dest, than we just need the truncate.
15359 if (N0.getOperand(0).getValueType().bitsGT(VT))
15360 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15361 // if the source and dest are the same type, we can drop both the extend
15362 // and the truncate.
15363 return N0.getOperand(0);
15364 }
15365
15366 // Try to narrow a truncate-of-sext_in_reg to the destination type:
15367 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
15368 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
15369 N0.hasOneUse()) {
15370 SDValue X = N0.getOperand(0);
15371 SDValue ExtVal = N0.getOperand(1);
15372 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
15373 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
15374 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
15375 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
15376 }
15377 }
15378
15379 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
15380 if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
15381 return SDValue();
15382
15383 // Fold extract-and-trunc into a narrow extract. For example:
15384 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
15385 // i32 y = TRUNCATE(i64 x)
15386 // -- becomes --
15387 // v16i8 b = BITCAST (v2i64 val)
15388 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
15389 //
15390 // Note: We only run this optimization after type legalization (which often
15391 // creates this pattern) and before operation legalization after which
15392 // we need to be more careful about the vector instructions that we generate.
15393 if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 &&
15394 N0->hasOneUse()) {
15395 EVT TrTy = N->getValueType(0);
15396 SDValue Src = N0;
15397
15398 // Check for cases where we shift down an upper element before truncation.
15399 int EltOffset = 0;
15400 if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
15401 if (auto ShAmt = DAG.getValidShiftAmount(Src)) {
15402 if ((*ShAmt % TrTy.getSizeInBits()) == 0) {
15403 Src = Src.getOperand(0);
15404 EltOffset = *ShAmt / TrTy.getSizeInBits();
15405 }
15406 }
15407 }
15408
15409 if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15410 EVT VecTy = Src.getOperand(0).getValueType();
15411 EVT ExTy = Src.getValueType();
15412
15413 auto EltCnt = VecTy.getVectorElementCount();
15414 unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits();
15415 auto NewEltCnt = EltCnt * SizeRatio;
15416
15417 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
15418 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
15419
15420 SDValue EltNo = Src->getOperand(1);
15421 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
15422 int Elt = EltNo->getAsZExtVal();
15423 int Index = isLE ? (Elt * SizeRatio + EltOffset)
15424 : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
15425 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
15426 DAG.getBitcast(NVT, Src.getOperand(0)),
15427 DAG.getVectorIdxConstant(Index, DL));
15428 }
15429 }
15430 }
15431
15432 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
15433 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse() &&
15434 TLI.isTruncateFree(SrcVT, VT)) {
15435 if (!LegalOperations ||
15436 (TLI.isOperationLegal(ISD::SELECT, SrcVT) &&
15437 TLI.isNarrowingProfitable(N0.getNode(), SrcVT, VT))) {
15438 SDLoc SL(N0);
15439 SDValue Cond = N0.getOperand(0);
15440 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
15441 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
15442 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
15443 }
15444 }
15445
15446 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
15447 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15448 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
15449 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
15450 SDValue Amt = N0.getOperand(1);
15451 KnownBits Known = DAG.computeKnownBits(Amt);
15452 unsigned Size = VT.getScalarSizeInBits();
15453 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
15454 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
15455 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15456 if (AmtVT != Amt.getValueType()) {
15457 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
15458 AddToWorklist(Amt.getNode());
15459 }
15460 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
15461 }
15462 }
15463
15464 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
15465 return V;
15466
15467 if (SDValue ABD = foldABSToABD(N, DL))
15468 return ABD;
15469
15470 // Attempt to pre-truncate BUILD_VECTOR sources.
15471 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
15472 N0.hasOneUse() &&
15473 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
15474 // Avoid creating illegal types if running after type legalizer.
15475 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
15476 EVT SVT = VT.getScalarType();
15477 SmallVector<SDValue, 8> TruncOps;
15478 for (const SDValue &Op : N0->op_values()) {
15479 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
15480 TruncOps.push_back(TruncOp);
15481 }
15482 return DAG.getBuildVector(VT, DL, TruncOps);
15483 }
15484
15485 // trunc (splat_vector x) -> splat_vector (trunc x)
15486 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
15487 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
15488 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
15489 EVT SVT = VT.getScalarType();
15490 return DAG.getSplatVector(
15491 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
15492 }
15493
15494 // Fold a series of buildvector, bitcast, and truncate if possible.
15495 // For example fold
15496 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
15497 // (2xi32 (buildvector x, y)).
15498 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
15499 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
15501 N0.getOperand(0).hasOneUse()) {
15502 SDValue BuildVect = N0.getOperand(0);
15503 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
15504 EVT TruncVecEltTy = VT.getVectorElementType();
15505
15506 // Check that the element types match.
15507 if (BuildVectEltTy == TruncVecEltTy) {
15508 // Now we only need to compute the offset of the truncated elements.
15509 unsigned BuildVecNumElts = BuildVect.getNumOperands();
15510 unsigned TruncVecNumElts = VT.getVectorNumElements();
15511 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
15512 unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
15513
15514 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
15515 "Invalid number of elements");
15516
15518 for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
15519 i += TruncEltOffset)
15520 Opnds.push_back(BuildVect.getOperand(i));
15521
15522 return DAG.getBuildVector(VT, DL, Opnds);
15523 }
15524 }
15525
15526 // fold (truncate (load x)) -> (smaller load x)
15527 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
15528 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
15529 if (SDValue Reduced = reduceLoadWidth(N))
15530 return Reduced;
15531
15532 // Handle the case where the truncated result is at least as wide as the
15533 // loaded type.
15534 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
15535 auto *LN0 = cast<LoadSDNode>(N0);
15536 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
15537 SDValue NewLoad = DAG.getExtLoad(
15538 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
15539 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
15540 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
15541 return NewLoad;
15542 }
15543 }
15544 }
15545
15546 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
15547 // where ... are all 'undef'.
15548 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
15550 SDValue V;
15551 unsigned Idx = 0;
15552 unsigned NumDefs = 0;
15553
15554 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
15555 SDValue X = N0.getOperand(i);
15556 if (!X.isUndef()) {
15557 V = X;
15558 Idx = i;
15559 NumDefs++;
15560 }
15561 // Stop if more than one members are non-undef.
15562 if (NumDefs > 1)
15563 break;
15564
15567 X.getValueType().getVectorElementCount()));
15568 }
15569
15570 if (NumDefs == 0)
15571 return DAG.getUNDEF(VT);
15572
15573 if (NumDefs == 1) {
15574 assert(V.getNode() && "The single defined operand is empty!");
15576 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
15577 if (i != Idx) {
15578 Opnds.push_back(DAG.getUNDEF(VTs[i]));
15579 continue;
15580 }
15581 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
15582 AddToWorklist(NV.getNode());
15583 Opnds.push_back(NV);
15584 }
15585 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
15586 }
15587 }
15588
15589 // Fold truncate of a bitcast of a vector to an extract of the low vector
15590 // element.
15591 //
15592 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
15593 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
15594 SDValue VecSrc = N0.getOperand(0);
15595 EVT VecSrcVT = VecSrc.getValueType();
15596 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
15597 (!LegalOperations ||
15598 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
15599 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
15600 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
15602 }
15603 }
15604
15605 // Simplify the operands using demanded-bits information.
15607 return SDValue(N, 0);
15608
15609 // fold (truncate (extract_subvector(ext x))) ->
15610 // (extract_subvector x)
15611 // TODO: This can be generalized to cover cases where the truncate and extract
15612 // do not fully cancel each other out.
15613 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
15614 SDValue N00 = N0.getOperand(0);
15615 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
15616 N00.getOpcode() == ISD::ZERO_EXTEND ||
15617 N00.getOpcode() == ISD::ANY_EXTEND) {
15618 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
15620 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
15621 N00.getOperand(0), N0.getOperand(1));
15622 }
15623 }
15624
15625 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15626 return NewVSel;
15627
15628 // Narrow a suitable binary operation with a non-opaque constant operand by
15629 // moving it ahead of the truncate. This is limited to pre-legalization
15630 // because targets may prefer a wider type during later combines and invert
15631 // this transform.
15632 switch (N0.getOpcode()) {
15633 case ISD::ADD:
15634 case ISD::SUB:
15635 case ISD::MUL:
15636 case ISD::AND:
15637 case ISD::OR:
15638 case ISD::XOR:
15639 if (!LegalOperations && N0.hasOneUse() &&
15640 (isConstantOrConstantVector(N0.getOperand(0), true) ||
15641 isConstantOrConstantVector(N0.getOperand(1), true))) {
15642 // TODO: We already restricted this to pre-legalization, but for vectors
15643 // we are extra cautious to not create an unsupported operation.
15644 // Target-specific changes are likely needed to avoid regressions here.
15645 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
15646 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15647 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15648 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
15649 }
15650 }
15651 break;
15652 case ISD::ADDE:
15653 case ISD::UADDO_CARRY:
15654 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
15655 // (trunc uaddo_carry(X, Y, Carry)) ->
15656 // (uaddo_carry trunc(X), trunc(Y), Carry)
15657 // When the adde's carry is not used.
15658 // We only do for uaddo_carry before legalize operation
15659 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
15660 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
15661 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
15662 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15663 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15664 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
15665 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
15666 }
15667 break;
15668 case ISD::USUBSAT:
15669 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
15670 // enough to know that the upper bits are zero we must ensure that we don't
15671 // introduce an extra truncate.
15672 if (!LegalOperations && N0.hasOneUse() &&
15675 VT.getScalarSizeInBits() &&
15676 hasOperation(N0.getOpcode(), VT)) {
15677 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
15678 DAG, DL);
15679 }
15680 break;
15681 }
15682
15683 return SDValue();
15684}
15685
15686static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
15687 SDValue Elt = N->getOperand(i);
15688 if (Elt.getOpcode() != ISD::MERGE_VALUES)
15689 return Elt.getNode();
15690 return Elt.getOperand(Elt.getResNo()).getNode();
15691}
15692
15693/// build_pair (load, load) -> load
15694/// if load locations are consecutive.
15695SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15696 assert(N->getOpcode() == ISD::BUILD_PAIR);
15697
15698 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15699 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15700
15701 // A BUILD_PAIR is always having the least significant part in elt 0 and the
15702 // most significant part in elt 1. So when combining into one large load, we
15703 // need to consider the endianness.
15704 if (DAG.getDataLayout().isBigEndian())
15705 std::swap(LD1, LD2);
15706
15707 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15708 !LD1->hasOneUse() || !LD2->hasOneUse() ||
15709 LD1->getAddressSpace() != LD2->getAddressSpace())
15710 return SDValue();
15711
15712 unsigned LD1Fast = 0;
15713 EVT LD1VT = LD1->getValueType(0);
15714 unsigned LD1Bytes = LD1VT.getStoreSize();
15715 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15716 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15717 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15718 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15719 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15720 LD1->getPointerInfo(), LD1->getAlign());
15721
15722 return SDValue();
15723}
15724
15725static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15726 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15727 // and Lo parts; on big-endian machines it doesn't.
15728 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15729}
15730
15731SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15732 const TargetLowering &TLI) {
15733 // If this is not a bitcast to an FP type or if the target doesn't have
15734 // IEEE754-compliant FP logic, we're done.
15735 EVT VT = N->getValueType(0);
15736 SDValue N0 = N->getOperand(0);
15737 EVT SourceVT = N0.getValueType();
15738
15739 if (!VT.isFloatingPoint())
15740 return SDValue();
15741
15742 // TODO: Handle cases where the integer constant is a different scalar
15743 // bitwidth to the FP.
15744 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15745 return SDValue();
15746
15747 unsigned FPOpcode;
15748 APInt SignMask;
15749 switch (N0.getOpcode()) {
15750 case ISD::AND:
15751 FPOpcode = ISD::FABS;
15752 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15753 break;
15754 case ISD::XOR:
15755 FPOpcode = ISD::FNEG;
15756 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15757 break;
15758 case ISD::OR:
15759 FPOpcode = ISD::FABS;
15760 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15761 break;
15762 default:
15763 return SDValue();
15764 }
15765
15766 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15767 return SDValue();
15768
15769 // This needs to be the inverse of logic in foldSignChangeInBitcast.
15770 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15771 // removing this would require more changes.
15772 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15773 if (sd_match(Op, m_BitCast(m_SpecificVT(VT))))
15774 return true;
15775
15776 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15777 };
15778
15779 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15780 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15781 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15782 // fneg (fabs X)
15783 SDValue LogicOp0 = N0.getOperand(0);
15784 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15785 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15786 IsBitCastOrFree(LogicOp0, VT)) {
15787 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15788 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15789 NumFPLogicOpsConv++;
15790 if (N0.getOpcode() == ISD::OR)
15791 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15792 return FPOp;
15793 }
15794
15795 return SDValue();
15796}
15797
15798SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15799 SDValue N0 = N->getOperand(0);
15800 EVT VT = N->getValueType(0);
15801
15802 if (N0.isUndef())
15803 return DAG.getUNDEF(VT);
15804
15805 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15806 // Only do this before legalize types, unless both types are integer and the
15807 // scalar type is legal. Only do this before legalize ops, since the target
15808 // maybe depending on the bitcast.
15809 // First check to see if this is all constant.
15810 // TODO: Support FP bitcasts after legalize types.
15811 if (VT.isVector() &&
15812 (!LegalTypes ||
15813 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15814 TLI.isTypeLegal(VT.getVectorElementType()))) &&
15815 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15816 cast<BuildVectorSDNode>(N0)->isConstant())
15817 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15819
15820 // If the input is a constant, let getNode fold it.
15821 if (isIntOrFPConstant(N0)) {
15822 // If we can't allow illegal operations, we need to check that this is just
15823 // a fp -> int or int -> conversion and that the resulting operation will
15824 // be legal.
15825 if (!LegalOperations ||
15826 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15828 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15829 TLI.isOperationLegal(ISD::Constant, VT))) {
15830 SDValue C = DAG.getBitcast(VT, N0);
15831 if (C.getNode() != N)
15832 return C;
15833 }
15834 }
15835
15836 // (conv (conv x, t1), t2) -> (conv x, t2)
15837 if (N0.getOpcode() == ISD::BITCAST)
15838 return DAG.getBitcast(VT, N0.getOperand(0));
15839
15840 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15841 // iff the current bitwise logicop type isn't legal
15842 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15843 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15844 auto IsFreeBitcast = [VT](SDValue V) {
15845 return (V.getOpcode() == ISD::BITCAST &&
15846 V.getOperand(0).getValueType() == VT) ||
15848 V->hasOneUse());
15849 };
15850 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15851 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15852 DAG.getBitcast(VT, N0.getOperand(0)),
15853 DAG.getBitcast(VT, N0.getOperand(1)));
15854 }
15855
15856 // fold (conv (load x)) -> (load (conv*)x)
15857 // If the resultant load doesn't need a higher alignment than the original!
15858 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15859 // Do not remove the cast if the types differ in endian layout.
15861 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15862 // If the load is volatile, we only want to change the load type if the
15863 // resulting load is legal. Otherwise we might increase the number of
15864 // memory accesses. We don't care if the original type was legal or not
15865 // as we assume software couldn't rely on the number of accesses of an
15866 // illegal type.
15867 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15868 TLI.isOperationLegal(ISD::LOAD, VT))) {
15869 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15870
15871 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15872 *LN0->getMemOperand())) {
15873 SDValue Load =
15874 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15875 LN0->getMemOperand());
15876 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15877 return Load;
15878 }
15879 }
15880
15881 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15882 return V;
15883
15884 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15885 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15886 //
15887 // For ppc_fp128:
15888 // fold (bitcast (fneg x)) ->
15889 // flipbit = signbit
15890 // (xor (bitcast x) (build_pair flipbit, flipbit))
15891 //
15892 // fold (bitcast (fabs x)) ->
15893 // flipbit = (and (extract_element (bitcast x), 0), signbit)
15894 // (xor (bitcast x) (build_pair flipbit, flipbit))
15895 // This often reduces constant pool loads.
15896 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15897 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15898 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15899 !N0.getValueType().isVector()) {
15900 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15901 AddToWorklist(NewConv.getNode());
15902
15903 SDLoc DL(N);
15904 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15905 assert(VT.getSizeInBits() == 128);
15906 SDValue SignBit = DAG.getConstant(
15907 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15908 SDValue FlipBit;
15909 if (N0.getOpcode() == ISD::FNEG) {
15910 FlipBit = SignBit;
15911 AddToWorklist(FlipBit.getNode());
15912 } else {
15913 assert(N0.getOpcode() == ISD::FABS);
15914 SDValue Hi =
15915 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15917 SDLoc(NewConv)));
15918 AddToWorklist(Hi.getNode());
15919 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15920 AddToWorklist(FlipBit.getNode());
15921 }
15922 SDValue FlipBits =
15923 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15924 AddToWorklist(FlipBits.getNode());
15925 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15926 }
15927 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15928 if (N0.getOpcode() == ISD::FNEG)
15929 return DAG.getNode(ISD::XOR, DL, VT,
15930 NewConv, DAG.getConstant(SignBit, DL, VT));
15931 assert(N0.getOpcode() == ISD::FABS);
15932 return DAG.getNode(ISD::AND, DL, VT,
15933 NewConv, DAG.getConstant(~SignBit, DL, VT));
15934 }
15935
15936 // fold (bitconvert (fcopysign cst, x)) ->
15937 // (or (and (bitconvert x), sign), (and cst, (not sign)))
15938 // Note that we don't handle (copysign x, cst) because this can always be
15939 // folded to an fneg or fabs.
15940 //
15941 // For ppc_fp128:
15942 // fold (bitcast (fcopysign cst, x)) ->
15943 // flipbit = (and (extract_element
15944 // (xor (bitcast cst), (bitcast x)), 0),
15945 // signbit)
15946 // (xor (bitcast cst) (build_pair flipbit, flipbit))
15947 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15948 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15949 !VT.isVector()) {
15950 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15951 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15952 if (isTypeLegal(IntXVT)) {
15953 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15954 AddToWorklist(X.getNode());
15955
15956 // If X has a different width than the result/lhs, sext it or truncate it.
15957 unsigned VTWidth = VT.getSizeInBits();
15958 if (OrigXWidth < VTWidth) {
15959 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15960 AddToWorklist(X.getNode());
15961 } else if (OrigXWidth > VTWidth) {
15962 // To get the sign bit in the right place, we have to shift it right
15963 // before truncating.
15964 SDLoc DL(X);
15965 X = DAG.getNode(ISD::SRL, DL,
15966 X.getValueType(), X,
15967 DAG.getConstant(OrigXWidth-VTWidth, DL,
15968 X.getValueType()));
15969 AddToWorklist(X.getNode());
15970 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15971 AddToWorklist(X.getNode());
15972 }
15973
15974 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15975 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15976 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15977 AddToWorklist(Cst.getNode());
15978 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15979 AddToWorklist(X.getNode());
15980 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15981 AddToWorklist(XorResult.getNode());
15982 SDValue XorResult64 = DAG.getNode(
15983 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15985 SDLoc(XorResult)));
15986 AddToWorklist(XorResult64.getNode());
15987 SDValue FlipBit =
15988 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15989 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15990 AddToWorklist(FlipBit.getNode());
15991 SDValue FlipBits =
15992 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15993 AddToWorklist(FlipBits.getNode());
15994 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15995 }
15996 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15997 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15998 X, DAG.getConstant(SignBit, SDLoc(X), VT));
15999 AddToWorklist(X.getNode());
16000
16001 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16002 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
16003 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
16004 AddToWorklist(Cst.getNode());
16005
16006 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
16007 }
16008 }
16009
16010 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
16011 if (N0.getOpcode() == ISD::BUILD_PAIR)
16012 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
16013 return CombineLD;
16014
16015 // int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x)))
16016 // => int_vt (any_extend elt_vt:x)
16017 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) {
16018 SDValue SrcScalar = N0.getOperand(0);
16019 if (SrcScalar.getValueType().isScalarInteger())
16020 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SrcScalar);
16021 }
16022
16023 // Remove double bitcasts from shuffles - this is often a legacy of
16024 // XformToShuffleWithZero being used to combine bitmaskings (of
16025 // float vectors bitcast to integer vectors) into shuffles.
16026 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
16027 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
16028 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
16031 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
16032
16033 // If operands are a bitcast, peek through if it casts the original VT.
16034 // If operands are a constant, just bitcast back to original VT.
16035 auto PeekThroughBitcast = [&](SDValue Op) {
16036 if (Op.getOpcode() == ISD::BITCAST &&
16037 Op.getOperand(0).getValueType() == VT)
16038 return SDValue(Op.getOperand(0));
16039 if (Op.isUndef() || isAnyConstantBuildVector(Op))
16040 return DAG.getBitcast(VT, Op);
16041 return SDValue();
16042 };
16043
16044 // FIXME: If either input vector is bitcast, try to convert the shuffle to
16045 // the result type of this bitcast. This would eliminate at least one
16046 // bitcast. See the transform in InstCombine.
16047 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
16048 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
16049 if (!(SV0 && SV1))
16050 return SDValue();
16051
16052 int MaskScale =
16054 SmallVector<int, 8> NewMask;
16055 for (int M : SVN->getMask())
16056 for (int i = 0; i != MaskScale; ++i)
16057 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
16058
16059 SDValue LegalShuffle =
16060 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
16061 if (LegalShuffle)
16062 return LegalShuffle;
16063 }
16064
16065 return SDValue();
16066}
16067
16068SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
16069 EVT VT = N->getValueType(0);
16070 return CombineConsecutiveLoads(N, VT);
16071}
16072
16073SDValue DAGCombiner::visitFREEZE(SDNode *N) {
16074 SDValue N0 = N->getOperand(0);
16075
16076 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
16077 return N0;
16078
16079 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
16080 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
16081 // example https://reviews.llvm.org/D136529#4120959.
16082 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
16083 return SDValue();
16084
16085 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
16086 // Try to push freeze through instructions that propagate but don't produce
16087 // poison as far as possible. If an operand of freeze follows three
16088 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
16089 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
16090 // the freeze through to the operands that are not guaranteed non-poison.
16091 // NOTE: we will strip poison-generating flags, so ignore them here.
16092 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
16093 /*ConsiderFlags*/ false) ||
16094 N0->getNumValues() != 1 || !N0->hasOneUse())
16095 return SDValue();
16096
16097 bool AllowMultipleMaybePoisonOperands =
16098 N0.getOpcode() == ISD::SELECT_CC ||
16099 N0.getOpcode() == ISD::SETCC ||
16100 N0.getOpcode() == ISD::BUILD_VECTOR ||
16101 N0.getOpcode() == ISD::BUILD_PAIR ||
16104
16105 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
16106 // ones" or "constant" into something that depends on FrozenUndef. We can
16107 // instead pick undef values to keep those properties, while at the same time
16108 // folding away the freeze.
16109 // If we implement a more general solution for folding away freeze(undef) in
16110 // the future, then this special handling can be removed.
16111 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
16112 SDLoc DL(N0);
16113 EVT VT = N0.getValueType();
16115 return DAG.getAllOnesConstant(DL, VT);
16118 for (const SDValue &Op : N0->op_values())
16119 NewVecC.push_back(
16120 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
16121 return DAG.getBuildVector(VT, DL, NewVecC);
16122 }
16123 }
16124
16125 SmallSet<SDValue, 8> MaybePoisonOperands;
16126 SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
16127 for (auto [OpNo, Op] : enumerate(N0->ops())) {
16128 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
16129 /*Depth*/ 1))
16130 continue;
16131 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
16132 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
16133 if (IsNewMaybePoisonOperand)
16134 MaybePoisonOperandNumbers.push_back(OpNo);
16135 if (!HadMaybePoisonOperands)
16136 continue;
16137 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
16138 // Multiple maybe-poison ops when not allowed - bail out.
16139 return SDValue();
16140 }
16141 }
16142 // NOTE: the whole op may be not guaranteed to not be undef or poison because
16143 // it could create undef or poison due to it's poison-generating flags.
16144 // So not finding any maybe-poison operands is fine.
16145
16146 for (unsigned OpNo : MaybePoisonOperandNumbers) {
16147 // N0 can mutate during iteration, so make sure to refetch the maybe poison
16148 // operands via the operand numbers. The typical scenario is that we have
16149 // something like this
16150 // t262: i32 = freeze t181
16151 // t150: i32 = ctlz_zero_undef t262
16152 // t184: i32 = ctlz_zero_undef t181
16153 // t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
16154 // When freezing the t181 operand we get t262 back, and then the
16155 // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
16156 // also recursively replace t184 by t150.
16157 SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
16158 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
16159 if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
16160 continue;
16161 // First, freeze each offending operand.
16162 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
16163 // Then, change all other uses of unfrozen operand to use frozen operand.
16164 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
16165 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
16166 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
16167 // But, that also updated the use in the freeze we just created, thus
16168 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
16169 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
16170 MaybePoisonOperand);
16171 }
16172 }
16173
16174 // This node has been merged with another.
16175 if (N->getOpcode() == ISD::DELETED_NODE)
16176 return SDValue(N, 0);
16177
16178 // The whole node may have been updated, so the value we were holding
16179 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
16180 N0 = N->getOperand(0);
16181
16182 // Finally, recreate the node, it's operands were updated to use
16183 // frozen operands, so we just need to use it's "original" operands.
16184 SmallVector<SDValue> Ops(N0->ops());
16185 // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
16186 for (SDValue &Op : Ops) {
16187 if (Op.getOpcode() == ISD::UNDEF)
16188 Op = DAG.getFreeze(Op);
16189 }
16190
16191 SDValue R;
16192 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0)) {
16193 // Special case handling for ShuffleVectorSDNode nodes.
16194 R = DAG.getVectorShuffle(N0.getValueType(), SDLoc(N0), Ops[0], Ops[1],
16195 SVN->getMask());
16196 } else {
16197 // NOTE: this strips poison generating flags.
16198 R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
16199 }
16200 assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
16201 "Can't create node that may be undef/poison!");
16202 return R;
16203}
16204
16205/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
16206/// operands. DstEltVT indicates the destination element value type.
16207SDValue DAGCombiner::
16208ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
16209 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
16210
16211 // If this is already the right type, we're done.
16212 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
16213
16214 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
16215 unsigned DstBitSize = DstEltVT.getSizeInBits();
16216
16217 // If this is a conversion of N elements of one type to N elements of another
16218 // type, convert each element. This handles FP<->INT cases.
16219 if (SrcBitSize == DstBitSize) {
16221 for (SDValue Op : BV->op_values()) {
16222 // If the vector element type is not legal, the BUILD_VECTOR operands
16223 // are promoted and implicitly truncated. Make that explicit here.
16224 if (Op.getValueType() != SrcEltVT)
16225 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
16226 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
16227 AddToWorklist(Ops.back().getNode());
16228 }
16229 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
16231 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
16232 }
16233
16234 // Otherwise, we're growing or shrinking the elements. To avoid having to
16235 // handle annoying details of growing/shrinking FP values, we convert them to
16236 // int first.
16237 if (SrcEltVT.isFloatingPoint()) {
16238 // Convert the input float vector to a int vector where the elements are the
16239 // same sizes.
16240 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
16241 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
16242 SrcEltVT = IntVT;
16243 }
16244
16245 // Now we know the input is an integer vector. If the output is a FP type,
16246 // convert to integer first, then to FP of the right size.
16247 if (DstEltVT.isFloatingPoint()) {
16248 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
16249 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
16250
16251 // Next, convert to FP elements of the same size.
16252 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
16253 }
16254
16255 // Okay, we know the src/dst types are both integers of differing types.
16256 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
16257
16258 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
16259 // BuildVectorSDNode?
16260 auto *BVN = cast<BuildVectorSDNode>(BV);
16261
16262 // Extract the constant raw bit data.
16263 BitVector UndefElements;
16264 SmallVector<APInt> RawBits;
16265 bool IsLE = DAG.getDataLayout().isLittleEndian();
16266 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
16267 return SDValue();
16268
16269 SDLoc DL(BV);
16271 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
16272 if (UndefElements[I])
16273 Ops.push_back(DAG.getUNDEF(DstEltVT));
16274 else
16275 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
16276 }
16277
16278 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
16279 return DAG.getBuildVector(VT, DL, Ops);
16280}
16281
16282// Returns true if floating point contraction is allowed on the FMUL-SDValue
16283// `N`
16285 assert(N.getOpcode() == ISD::FMUL);
16286
16287 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
16288 N->getFlags().hasAllowContract();
16289}
16290
16291// Returns true if `N` can assume no infinities involved in its computation.
16293 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
16294}
16295
16296/// Try to perform FMA combining on a given FADD node.
16297template <class MatchContextClass>
16298SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
16299 SDValue N0 = N->getOperand(0);
16300 SDValue N1 = N->getOperand(1);
16301 EVT VT = N->getValueType(0);
16302 SDLoc SL(N);
16303 MatchContextClass matcher(DAG, TLI, N);
16304 const TargetOptions &Options = DAG.getTarget().Options;
16305
16306 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16307
16308 // Floating-point multiply-add with intermediate rounding.
16309 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
16310 // FIXME: Add VP_FMAD opcode.
16311 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
16312
16313 // Floating-point multiply-add without intermediate rounding.
16314 bool HasFMA =
16315 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
16317
16318 // No valid opcode, do not combine.
16319 if (!HasFMAD && !HasFMA)
16320 return SDValue();
16321
16322 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
16323 Options.UnsafeFPMath || HasFMAD);
16324 // If the addition is not contractable, do not combine.
16325 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16326 return SDValue();
16327
16328 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
16329 // beneficial. It does not reduce latency. It increases register pressure. It
16330 // replaces an fadd with an fma which is a more complex instruction, so is
16331 // likely to have a larger encoding, use more functional units, etc.
16332 if (N0 == N1)
16333 return SDValue();
16334
16335 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
16336 return SDValue();
16337
16338 // Always prefer FMAD to FMA for precision.
16339 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16341
16342 auto isFusedOp = [&](SDValue N) {
16343 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16344 };
16345
16346 // Is the node an FMUL and contractable either due to global flags or
16347 // SDNodeFlags.
16348 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
16349 if (!matcher.match(N, ISD::FMUL))
16350 return false;
16351 return AllowFusionGlobally || N->getFlags().hasAllowContract();
16352 };
16353 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
16354 // prefer to fold the multiply with fewer uses.
16356 if (N0->use_size() > N1->use_size())
16357 std::swap(N0, N1);
16358 }
16359
16360 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
16361 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
16362 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
16363 N0.getOperand(1), N1);
16364 }
16365
16366 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
16367 // Note: Commutes FADD operands.
16368 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
16369 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
16370 N1.getOperand(1), N0);
16371 }
16372
16373 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
16374 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
16375 // This also works with nested fma instructions:
16376 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
16377 // fma A, B, (fma C, D, fma (E, F, G))
16378 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
16379 // fma A, B, (fma C, D, fma (E, F, G)).
16380 // This requires reassociation because it changes the order of operations.
16381 bool CanReassociate =
16382 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16383 if (CanReassociate) {
16384 SDValue FMA, E;
16385 if (isFusedOp(N0) && N0.hasOneUse()) {
16386 FMA = N0;
16387 E = N1;
16388 } else if (isFusedOp(N1) && N1.hasOneUse()) {
16389 FMA = N1;
16390 E = N0;
16391 }
16392
16393 SDValue TmpFMA = FMA;
16394 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
16395 SDValue FMul = TmpFMA->getOperand(2);
16396 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
16397 SDValue C = FMul.getOperand(0);
16398 SDValue D = FMul.getOperand(1);
16399 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
16401 // Replacing the inner FMul could cause the outer FMA to be simplified
16402 // away.
16403 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
16404 }
16405
16406 TmpFMA = TmpFMA->getOperand(2);
16407 }
16408 }
16409
16410 // Look through FP_EXTEND nodes to do more combining.
16411
16412 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
16413 if (matcher.match(N0, ISD::FP_EXTEND)) {
16414 SDValue N00 = N0.getOperand(0);
16415 if (isContractableFMUL(N00) &&
16416 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16417 N00.getValueType())) {
16418 return matcher.getNode(
16419 PreferredFusedOpcode, SL, VT,
16420 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16421 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
16422 }
16423 }
16424
16425 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
16426 // Note: Commutes FADD operands.
16427 if (matcher.match(N1, ISD::FP_EXTEND)) {
16428 SDValue N10 = N1.getOperand(0);
16429 if (isContractableFMUL(N10) &&
16430 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16431 N10.getValueType())) {
16432 return matcher.getNode(
16433 PreferredFusedOpcode, SL, VT,
16434 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
16435 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16436 }
16437 }
16438
16439 // More folding opportunities when target permits.
16440 if (Aggressive) {
16441 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
16442 // -> (fma x, y, (fma (fpext u), (fpext v), z))
16443 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
16444 SDValue Z) {
16445 return matcher.getNode(
16446 PreferredFusedOpcode, SL, VT, X, Y,
16447 matcher.getNode(PreferredFusedOpcode, SL, VT,
16448 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
16449 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
16450 };
16451 if (isFusedOp(N0)) {
16452 SDValue N02 = N0.getOperand(2);
16453 if (matcher.match(N02, ISD::FP_EXTEND)) {
16454 SDValue N020 = N02.getOperand(0);
16455 if (isContractableFMUL(N020) &&
16456 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16457 N020.getValueType())) {
16458 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
16459 N020.getOperand(0), N020.getOperand(1),
16460 N1);
16461 }
16462 }
16463 }
16464
16465 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
16466 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
16467 // FIXME: This turns two single-precision and one double-precision
16468 // operation into two double-precision operations, which might not be
16469 // interesting for all targets, especially GPUs.
16470 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
16471 SDValue Z) {
16472 return matcher.getNode(
16473 PreferredFusedOpcode, SL, VT,
16474 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
16475 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
16476 matcher.getNode(PreferredFusedOpcode, SL, VT,
16477 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
16478 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
16479 };
16480 if (N0.getOpcode() == ISD::FP_EXTEND) {
16481 SDValue N00 = N0.getOperand(0);
16482 if (isFusedOp(N00)) {
16483 SDValue N002 = N00.getOperand(2);
16484 if (isContractableFMUL(N002) &&
16485 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16486 N00.getValueType())) {
16487 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
16488 N002.getOperand(0), N002.getOperand(1),
16489 N1);
16490 }
16491 }
16492 }
16493
16494 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
16495 // -> (fma y, z, (fma (fpext u), (fpext v), x))
16496 if (isFusedOp(N1)) {
16497 SDValue N12 = N1.getOperand(2);
16498 if (N12.getOpcode() == ISD::FP_EXTEND) {
16499 SDValue N120 = N12.getOperand(0);
16500 if (isContractableFMUL(N120) &&
16501 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16502 N120.getValueType())) {
16503 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
16504 N120.getOperand(0), N120.getOperand(1),
16505 N0);
16506 }
16507 }
16508 }
16509
16510 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
16511 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
16512 // FIXME: This turns two single-precision and one double-precision
16513 // operation into two double-precision operations, which might not be
16514 // interesting for all targets, especially GPUs.
16515 if (N1.getOpcode() == ISD::FP_EXTEND) {
16516 SDValue N10 = N1.getOperand(0);
16517 if (isFusedOp(N10)) {
16518 SDValue N102 = N10.getOperand(2);
16519 if (isContractableFMUL(N102) &&
16520 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16521 N10.getValueType())) {
16522 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
16523 N102.getOperand(0), N102.getOperand(1),
16524 N0);
16525 }
16526 }
16527 }
16528 }
16529
16530 return SDValue();
16531}
16532
16533/// Try to perform FMA combining on a given FSUB node.
16534template <class MatchContextClass>
16535SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
16536 SDValue N0 = N->getOperand(0);
16537 SDValue N1 = N->getOperand(1);
16538 EVT VT = N->getValueType(0);
16539 SDLoc SL(N);
16540 MatchContextClass matcher(DAG, TLI, N);
16541 const TargetOptions &Options = DAG.getTarget().Options;
16542
16543 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16544
16545 // Floating-point multiply-add with intermediate rounding.
16546 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
16547 // FIXME: Add VP_FMAD opcode.
16548 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
16549
16550 // Floating-point multiply-add without intermediate rounding.
16551 bool HasFMA =
16552 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
16554
16555 // No valid opcode, do not combine.
16556 if (!HasFMAD && !HasFMA)
16557 return SDValue();
16558
16559 const SDNodeFlags Flags = N->getFlags();
16560 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
16561 Options.UnsafeFPMath || HasFMAD);
16562
16563 // If the subtraction is not contractable, do not combine.
16564 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16565 return SDValue();
16566
16567 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
16568 return SDValue();
16569
16570 // Always prefer FMAD to FMA for precision.
16571 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16573 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
16574
16575 // Is the node an FMUL and contractable either due to global flags or
16576 // SDNodeFlags.
16577 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
16578 if (!matcher.match(N, ISD::FMUL))
16579 return false;
16580 return AllowFusionGlobally || N->getFlags().hasAllowContract();
16581 };
16582
16583 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16584 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
16585 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
16586 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
16587 XY.getOperand(1),
16588 matcher.getNode(ISD::FNEG, SL, VT, Z));
16589 }
16590 return SDValue();
16591 };
16592
16593 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16594 // Note: Commutes FSUB operands.
16595 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
16596 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
16597 return matcher.getNode(
16598 PreferredFusedOpcode, SL, VT,
16599 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
16600 YZ.getOperand(1), X);
16601 }
16602 return SDValue();
16603 };
16604
16605 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
16606 // prefer to fold the multiply with fewer uses.
16607 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
16608 (N0->use_size() > N1->use_size())) {
16609 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
16610 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16611 return V;
16612 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
16613 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16614 return V;
16615 } else {
16616 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16617 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16618 return V;
16619 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16620 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16621 return V;
16622 }
16623
16624 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
16625 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
16626 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
16627 SDValue N00 = N0.getOperand(0).getOperand(0);
16628 SDValue N01 = N0.getOperand(0).getOperand(1);
16629 return matcher.getNode(PreferredFusedOpcode, SL, VT,
16630 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
16631 matcher.getNode(ISD::FNEG, SL, VT, N1));
16632 }
16633
16634 // Look through FP_EXTEND nodes to do more combining.
16635
16636 // fold (fsub (fpext (fmul x, y)), z)
16637 // -> (fma (fpext x), (fpext y), (fneg z))
16638 if (matcher.match(N0, ISD::FP_EXTEND)) {
16639 SDValue N00 = N0.getOperand(0);
16640 if (isContractableFMUL(N00) &&
16641 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16642 N00.getValueType())) {
16643 return matcher.getNode(
16644 PreferredFusedOpcode, SL, VT,
16645 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16646 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16647 matcher.getNode(ISD::FNEG, SL, VT, N1));
16648 }
16649 }
16650
16651 // fold (fsub x, (fpext (fmul y, z)))
16652 // -> (fma (fneg (fpext y)), (fpext z), x)
16653 // Note: Commutes FSUB operands.
16654 if (matcher.match(N1, ISD::FP_EXTEND)) {
16655 SDValue N10 = N1.getOperand(0);
16656 if (isContractableFMUL(N10) &&
16657 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16658 N10.getValueType())) {
16659 return matcher.getNode(
16660 PreferredFusedOpcode, SL, VT,
16661 matcher.getNode(
16662 ISD::FNEG, SL, VT,
16663 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
16664 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16665 }
16666 }
16667
16668 // fold (fsub (fpext (fneg (fmul, x, y))), z)
16669 // -> (fneg (fma (fpext x), (fpext y), z))
16670 // Note: This could be removed with appropriate canonicalization of the
16671 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16672 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16673 // from implementing the canonicalization in visitFSUB.
16674 if (matcher.match(N0, ISD::FP_EXTEND)) {
16675 SDValue N00 = N0.getOperand(0);
16676 if (matcher.match(N00, ISD::FNEG)) {
16677 SDValue N000 = N00.getOperand(0);
16678 if (isContractableFMUL(N000) &&
16679 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16680 N00.getValueType())) {
16681 return matcher.getNode(
16682 ISD::FNEG, SL, VT,
16683 matcher.getNode(
16684 PreferredFusedOpcode, SL, VT,
16685 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16686 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16687 N1));
16688 }
16689 }
16690 }
16691
16692 // fold (fsub (fneg (fpext (fmul, x, y))), z)
16693 // -> (fneg (fma (fpext x)), (fpext y), z)
16694 // Note: This could be removed with appropriate canonicalization of the
16695 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16696 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16697 // from implementing the canonicalization in visitFSUB.
16698 if (matcher.match(N0, ISD::FNEG)) {
16699 SDValue N00 = N0.getOperand(0);
16700 if (matcher.match(N00, ISD::FP_EXTEND)) {
16701 SDValue N000 = N00.getOperand(0);
16702 if (isContractableFMUL(N000) &&
16703 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16704 N000.getValueType())) {
16705 return matcher.getNode(
16706 ISD::FNEG, SL, VT,
16707 matcher.getNode(
16708 PreferredFusedOpcode, SL, VT,
16709 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16710 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16711 N1));
16712 }
16713 }
16714 }
16715
16716 auto isReassociable = [&Options](SDNode *N) {
16717 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16718 };
16719
16720 auto isContractableAndReassociableFMUL = [&isContractableFMUL,
16721 &isReassociable](SDValue N) {
16722 return isContractableFMUL(N) && isReassociable(N.getNode());
16723 };
16724
16725 auto isFusedOp = [&](SDValue N) {
16726 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16727 };
16728
16729 // More folding opportunities when target permits.
16730 if (Aggressive && isReassociable(N)) {
16731 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
16732 // fold (fsub (fma x, y, (fmul u, v)), z)
16733 // -> (fma x, y (fma u, v, (fneg z)))
16734 if (CanFuse && isFusedOp(N0) &&
16735 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
16736 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
16737 return matcher.getNode(
16738 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16739 matcher.getNode(PreferredFusedOpcode, SL, VT,
16740 N0.getOperand(2).getOperand(0),
16741 N0.getOperand(2).getOperand(1),
16742 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16743 }
16744
16745 // fold (fsub x, (fma y, z, (fmul u, v)))
16746 // -> (fma (fneg y), z, (fma (fneg u), v, x))
16747 if (CanFuse && isFusedOp(N1) &&
16748 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
16749 N1->hasOneUse() && NoSignedZero) {
16750 SDValue N20 = N1.getOperand(2).getOperand(0);
16751 SDValue N21 = N1.getOperand(2).getOperand(1);
16752 return matcher.getNode(
16753 PreferredFusedOpcode, SL, VT,
16754 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16755 N1.getOperand(1),
16756 matcher.getNode(PreferredFusedOpcode, SL, VT,
16757 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16758 }
16759
16760 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16761 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16762 if (isFusedOp(N0) && N0->hasOneUse()) {
16763 SDValue N02 = N0.getOperand(2);
16764 if (matcher.match(N02, ISD::FP_EXTEND)) {
16765 SDValue N020 = N02.getOperand(0);
16766 if (isContractableAndReassociableFMUL(N020) &&
16767 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16768 N020.getValueType())) {
16769 return matcher.getNode(
16770 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16771 matcher.getNode(
16772 PreferredFusedOpcode, SL, VT,
16773 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16774 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16775 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16776 }
16777 }
16778 }
16779
16780 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16781 // -> (fma (fpext x), (fpext y),
16782 // (fma (fpext u), (fpext v), (fneg z)))
16783 // FIXME: This turns two single-precision and one double-precision
16784 // operation into two double-precision operations, which might not be
16785 // interesting for all targets, especially GPUs.
16786 if (matcher.match(N0, ISD::FP_EXTEND)) {
16787 SDValue N00 = N0.getOperand(0);
16788 if (isFusedOp(N00)) {
16789 SDValue N002 = N00.getOperand(2);
16790 if (isContractableAndReassociableFMUL(N002) &&
16791 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16792 N00.getValueType())) {
16793 return matcher.getNode(
16794 PreferredFusedOpcode, SL, VT,
16795 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16796 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16797 matcher.getNode(
16798 PreferredFusedOpcode, SL, VT,
16799 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16800 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16801 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16802 }
16803 }
16804 }
16805
16806 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16807 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16808 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16809 N1->hasOneUse()) {
16810 SDValue N120 = N1.getOperand(2).getOperand(0);
16811 if (isContractableAndReassociableFMUL(N120) &&
16812 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16813 N120.getValueType())) {
16814 SDValue N1200 = N120.getOperand(0);
16815 SDValue N1201 = N120.getOperand(1);
16816 return matcher.getNode(
16817 PreferredFusedOpcode, SL, VT,
16818 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16819 N1.getOperand(1),
16820 matcher.getNode(
16821 PreferredFusedOpcode, SL, VT,
16822 matcher.getNode(ISD::FNEG, SL, VT,
16823 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16824 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16825 }
16826 }
16827
16828 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16829 // -> (fma (fneg (fpext y)), (fpext z),
16830 // (fma (fneg (fpext u)), (fpext v), x))
16831 // FIXME: This turns two single-precision and one double-precision
16832 // operation into two double-precision operations, which might not be
16833 // interesting for all targets, especially GPUs.
16834 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16835 SDValue CvtSrc = N1.getOperand(0);
16836 SDValue N100 = CvtSrc.getOperand(0);
16837 SDValue N101 = CvtSrc.getOperand(1);
16838 SDValue N102 = CvtSrc.getOperand(2);
16839 if (isContractableAndReassociableFMUL(N102) &&
16840 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16841 CvtSrc.getValueType())) {
16842 SDValue N1020 = N102.getOperand(0);
16843 SDValue N1021 = N102.getOperand(1);
16844 return matcher.getNode(
16845 PreferredFusedOpcode, SL, VT,
16846 matcher.getNode(ISD::FNEG, SL, VT,
16847 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16848 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16849 matcher.getNode(
16850 PreferredFusedOpcode, SL, VT,
16851 matcher.getNode(ISD::FNEG, SL, VT,
16852 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16853 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16854 }
16855 }
16856 }
16857
16858 return SDValue();
16859}
16860
16861/// Try to perform FMA combining on a given FMUL node based on the distributive
16862/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16863/// subtraction instead of addition).
16864SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16865 SDValue N0 = N->getOperand(0);
16866 SDValue N1 = N->getOperand(1);
16867 EVT VT = N->getValueType(0);
16868 SDLoc SL(N);
16869
16870 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16871
16872 const TargetOptions &Options = DAG.getTarget().Options;
16873
16874 // The transforms below are incorrect when x == 0 and y == inf, because the
16875 // intermediate multiplication produces a nan.
16876 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16877 if (!hasNoInfs(Options, FAdd))
16878 return SDValue();
16879
16880 // Floating-point multiply-add without intermediate rounding.
16881 bool HasFMA =
16883 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
16885
16886 // Floating-point multiply-add with intermediate rounding. This can result
16887 // in a less precise result due to the changed rounding order.
16888 bool HasFMAD = Options.UnsafeFPMath &&
16889 (LegalOperations && TLI.isFMADLegal(DAG, N));
16890
16891 // No valid opcode, do not combine.
16892 if (!HasFMAD && !HasFMA)
16893 return SDValue();
16894
16895 // Always prefer FMAD to FMA for precision.
16896 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16898
16899 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16900 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16901 auto FuseFADD = [&](SDValue X, SDValue Y) {
16902 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16903 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16904 if (C->isExactlyValue(+1.0))
16905 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16906 Y);
16907 if (C->isExactlyValue(-1.0))
16908 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16909 DAG.getNode(ISD::FNEG, SL, VT, Y));
16910 }
16911 }
16912 return SDValue();
16913 };
16914
16915 if (SDValue FMA = FuseFADD(N0, N1))
16916 return FMA;
16917 if (SDValue FMA = FuseFADD(N1, N0))
16918 return FMA;
16919
16920 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16921 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16922 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16923 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16924 auto FuseFSUB = [&](SDValue X, SDValue Y) {
16925 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16926 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16927 if (C0->isExactlyValue(+1.0))
16928 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16929 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16930 Y);
16931 if (C0->isExactlyValue(-1.0))
16932 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16933 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16934 DAG.getNode(ISD::FNEG, SL, VT, Y));
16935 }
16936 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16937 if (C1->isExactlyValue(+1.0))
16938 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16939 DAG.getNode(ISD::FNEG, SL, VT, Y));
16940 if (C1->isExactlyValue(-1.0))
16941 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16942 Y);
16943 }
16944 }
16945 return SDValue();
16946 };
16947
16948 if (SDValue FMA = FuseFSUB(N0, N1))
16949 return FMA;
16950 if (SDValue FMA = FuseFSUB(N1, N0))
16951 return FMA;
16952
16953 return SDValue();
16954}
16955
16956SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16957 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16958
16959 // FADD -> FMA combines:
16960 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16961 if (Fused.getOpcode() != ISD::DELETED_NODE)
16962 AddToWorklist(Fused.getNode());
16963 return Fused;
16964 }
16965 return SDValue();
16966}
16967
16968SDValue DAGCombiner::visitFADD(SDNode *N) {
16969 SDValue N0 = N->getOperand(0);
16970 SDValue N1 = N->getOperand(1);
16971 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
16972 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
16973 EVT VT = N->getValueType(0);
16974 SDLoc DL(N);
16975 const TargetOptions &Options = DAG.getTarget().Options;
16976 SDNodeFlags Flags = N->getFlags();
16977 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16978
16979 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16980 return R;
16981
16982 // fold (fadd c1, c2) -> c1 + c2
16983 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16984 return C;
16985
16986 // canonicalize constant to RHS
16987 if (N0CFP && !N1CFP)
16988 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16989
16990 // fold vector ops
16991 if (VT.isVector())
16992 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16993 return FoldedVOp;
16994
16995 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16996 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16997 if (N1C && N1C->isZero())
16998 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
16999 return N0;
17000
17001 if (SDValue NewSel = foldBinOpIntoSelect(N))
17002 return NewSel;
17003
17004 // fold (fadd A, (fneg B)) -> (fsub A, B)
17005 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17006 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17007 N1, DAG, LegalOperations, ForCodeSize))
17008 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
17009
17010 // fold (fadd (fneg A), B) -> (fsub B, A)
17011 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17012 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17013 N0, DAG, LegalOperations, ForCodeSize))
17014 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
17015
17016 auto isFMulNegTwo = [](SDValue FMul) {
17017 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
17018 return false;
17019 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
17020 return C && C->isExactlyValue(-2.0);
17021 };
17022
17023 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
17024 if (isFMulNegTwo(N0)) {
17025 SDValue B = N0.getOperand(0);
17026 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17027 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
17028 }
17029 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
17030 if (isFMulNegTwo(N1)) {
17031 SDValue B = N1.getOperand(0);
17032 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17033 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
17034 }
17035
17036 // No FP constant should be created after legalization as Instruction
17037 // Selection pass has a hard time dealing with FP constants.
17038 bool AllowNewConst = (Level < AfterLegalizeDAG);
17039
17040 // If nnan is enabled, fold lots of things.
17041 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
17042 // If allowed, fold (fadd (fneg x), x) -> 0.0
17043 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
17044 return DAG.getConstantFP(0.0, DL, VT);
17045
17046 // If allowed, fold (fadd x, (fneg x)) -> 0.0
17047 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
17048 return DAG.getConstantFP(0.0, DL, VT);
17049 }
17050
17051 // If 'unsafe math' or reassoc and nsz, fold lots of things.
17052 // TODO: break out portions of the transformations below for which Unsafe is
17053 // considered and which do not require both nsz and reassoc
17054 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
17055 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
17056 AllowNewConst) {
17057 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
17058 if (N1CFP && N0.getOpcode() == ISD::FADD &&
17060 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
17061 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
17062 }
17063
17064 // We can fold chains of FADD's of the same value into multiplications.
17065 // This transform is not safe in general because we are reducing the number
17066 // of rounding steps.
17067 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
17068 if (N0.getOpcode() == ISD::FMUL) {
17069 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17070 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
17071
17072 // (fadd (fmul x, c), x) -> (fmul x, c+1)
17073 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
17074 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17075 DAG.getConstantFP(1.0, DL, VT));
17076 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
17077 }
17078
17079 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
17080 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
17081 N1.getOperand(0) == N1.getOperand(1) &&
17082 N0.getOperand(0) == N1.getOperand(0)) {
17083 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17084 DAG.getConstantFP(2.0, DL, VT));
17085 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
17086 }
17087 }
17088
17089 if (N1.getOpcode() == ISD::FMUL) {
17090 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17091 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
17092
17093 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
17094 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
17095 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17096 DAG.getConstantFP(1.0, DL, VT));
17097 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
17098 }
17099
17100 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
17101 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
17102 N0.getOperand(0) == N0.getOperand(1) &&
17103 N1.getOperand(0) == N0.getOperand(0)) {
17104 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17105 DAG.getConstantFP(2.0, DL, VT));
17106 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
17107 }
17108 }
17109
17110 if (N0.getOpcode() == ISD::FADD) {
17111 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17112 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
17113 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
17114 (N0.getOperand(0) == N1)) {
17115 return DAG.getNode(ISD::FMUL, DL, VT, N1,
17116 DAG.getConstantFP(3.0, DL, VT));
17117 }
17118 }
17119
17120 if (N1.getOpcode() == ISD::FADD) {
17121 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17122 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
17123 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
17124 N1.getOperand(0) == N0) {
17125 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17126 DAG.getConstantFP(3.0, DL, VT));
17127 }
17128 }
17129
17130 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
17131 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
17132 N0.getOperand(0) == N0.getOperand(1) &&
17133 N1.getOperand(0) == N1.getOperand(1) &&
17134 N0.getOperand(0) == N1.getOperand(0)) {
17135 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
17136 DAG.getConstantFP(4.0, DL, VT));
17137 }
17138 }
17139
17140 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
17141 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
17142 VT, N0, N1, Flags))
17143 return SD;
17144 } // enable-unsafe-fp-math
17145
17146 // FADD -> FMA combines:
17147 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
17148 if (Fused.getOpcode() != ISD::DELETED_NODE)
17149 AddToWorklist(Fused.getNode());
17150 return Fused;
17151 }
17152 return SDValue();
17153}
17154
17155SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
17156 SDValue Chain = N->getOperand(0);
17157 SDValue N0 = N->getOperand(1);
17158 SDValue N1 = N->getOperand(2);
17159 EVT VT = N->getValueType(0);
17160 EVT ChainVT = N->getValueType(1);
17161 SDLoc DL(N);
17162 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17163
17164 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
17165 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17166 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17167 N1, DAG, LegalOperations, ForCodeSize)) {
17168 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17169 {Chain, N0, NegN1});
17170 }
17171
17172 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
17173 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17174 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17175 N0, DAG, LegalOperations, ForCodeSize)) {
17176 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17177 {Chain, N1, NegN0});
17178 }
17179 return SDValue();
17180}
17181
17182SDValue DAGCombiner::visitFSUB(SDNode *N) {
17183 SDValue N0 = N->getOperand(0);
17184 SDValue N1 = N->getOperand(1);
17185 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
17186 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
17187 EVT VT = N->getValueType(0);
17188 SDLoc DL(N);
17189 const TargetOptions &Options = DAG.getTarget().Options;
17190 const SDNodeFlags Flags = N->getFlags();
17191 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17192
17193 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17194 return R;
17195
17196 // fold (fsub c1, c2) -> c1-c2
17197 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
17198 return C;
17199
17200 // fold vector ops
17201 if (VT.isVector())
17202 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17203 return FoldedVOp;
17204
17205 if (SDValue NewSel = foldBinOpIntoSelect(N))
17206 return NewSel;
17207
17208 // (fsub A, 0) -> A
17209 if (N1CFP && N1CFP->isZero()) {
17210 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
17211 Flags.hasNoSignedZeros()) {
17212 return N0;
17213 }
17214 }
17215
17216 if (N0 == N1) {
17217 // (fsub x, x) -> 0.0
17218 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
17219 return DAG.getConstantFP(0.0f, DL, VT);
17220 }
17221
17222 // (fsub -0.0, N1) -> -N1
17223 if (N0CFP && N0CFP->isZero()) {
17224 if (N0CFP->isNegative() ||
17225 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
17226 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
17227 // flushed to zero, unless all users treat denorms as zero (DAZ).
17228 // FIXME: This transform will change the sign of a NaN and the behavior
17229 // of a signaling NaN. It is only valid when a NoNaN flag is present.
17230 DenormalMode DenormMode = DAG.getDenormalMode(VT);
17231 if (DenormMode == DenormalMode::getIEEE()) {
17232 if (SDValue NegN1 =
17233 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
17234 return NegN1;
17235 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17236 return DAG.getNode(ISD::FNEG, DL, VT, N1);
17237 }
17238 }
17239 }
17240
17241 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
17242 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
17243 N1.getOpcode() == ISD::FADD) {
17244 // X - (X + Y) -> -Y
17245 if (N0 == N1->getOperand(0))
17246 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
17247 // X - (Y + X) -> -Y
17248 if (N0 == N1->getOperand(1))
17249 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
17250 }
17251
17252 // fold (fsub A, (fneg B)) -> (fadd A, B)
17253 if (SDValue NegN1 =
17254 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
17255 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
17256
17257 // FSUB -> FMA combines:
17258 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
17259 AddToWorklist(Fused.getNode());
17260 return Fused;
17261 }
17262
17263 return SDValue();
17264}
17265
17266// Transform IEEE Floats:
17267// (fmul C, (uitofp Pow2))
17268// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
17269// (fdiv C, (uitofp Pow2))
17270// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
17271//
17272// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
17273// there is no need for more than an add/sub.
17274//
17275// This is valid under the following circumstances:
17276// 1) We are dealing with IEEE floats
17277// 2) C is normal
17278// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
17279// TODO: Much of this could also be used for generating `ldexp` on targets the
17280// prefer it.
17281SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
17282 EVT VT = N->getValueType(0);
17283 SDValue ConstOp, Pow2Op;
17284
17285 std::optional<int> Mantissa;
17286 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
17287 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
17288 return false;
17289
17290 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
17291 Pow2Op = N->getOperand(1 - ConstOpIdx);
17292 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
17293 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
17294 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
17295 return false;
17296
17297 Pow2Op = Pow2Op.getOperand(0);
17298
17299 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
17300 // TODO: We could use knownbits to make this bound more precise.
17301 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
17302
17303 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
17304 if (CFP == nullptr)
17305 return false;
17306
17307 const APFloat &APF = CFP->getValueAPF();
17308
17309 // Make sure we have normal/ieee constant.
17310 if (!APF.isNormal() || !APF.isIEEE())
17311 return false;
17312
17313 // Make sure the floats exponent is within the bounds that this transform
17314 // produces bitwise equals value.
17315 int CurExp = ilogb(APF);
17316 // FMul by pow2 will only increase exponent.
17317 int MinExp =
17318 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
17319 // FDiv by pow2 will only decrease exponent.
17320 int MaxExp =
17321 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
17322 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
17324 return false;
17325
17326 // Finally make sure we actually know the mantissa for the float type.
17327 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
17328 if (!Mantissa)
17329 Mantissa = ThisMantissa;
17330
17331 return *Mantissa == ThisMantissa && ThisMantissa > 0;
17332 };
17333
17334 // TODO: We may be able to include undefs.
17335 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
17336 };
17337
17338 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
17339 return SDValue();
17340
17341 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
17342 return SDValue();
17343
17344 // Get log2 after all other checks have taken place. This is because
17345 // BuildLogBase2 may create a new node.
17346 SDLoc DL(N);
17347 // Get Log2 type with same bitwidth as the float type (VT).
17348 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
17349 if (VT.isVector())
17350 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
17352
17353 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
17354 /*InexpensiveOnly*/ true, NewIntVT);
17355 if (!Log2)
17356 return SDValue();
17357
17358 // Perform actual transform.
17359 SDValue MantissaShiftCnt =
17360 DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
17361 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
17362 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
17363 // cast. We could implement that by handle here to handle the casts.
17364 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
17365 SDValue ResAsInt =
17366 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
17367 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
17368 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
17369 return ResAsFP;
17370}
17371
17372SDValue DAGCombiner::visitFMUL(SDNode *N) {
17373 SDValue N0 = N->getOperand(0);
17374 SDValue N1 = N->getOperand(1);
17375 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
17376 EVT VT = N->getValueType(0);
17377 SDLoc DL(N);
17378 const TargetOptions &Options = DAG.getTarget().Options;
17379 const SDNodeFlags Flags = N->getFlags();
17380 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17381
17382 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17383 return R;
17384
17385 // fold (fmul c1, c2) -> c1*c2
17386 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
17387 return C;
17388
17389 // canonicalize constant to RHS
17392 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
17393
17394 // fold vector ops
17395 if (VT.isVector())
17396 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17397 return FoldedVOp;
17398
17399 if (SDValue NewSel = foldBinOpIntoSelect(N))
17400 return NewSel;
17401
17402 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
17403 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
17405 N0.getOpcode() == ISD::FMUL) {
17406 SDValue N00 = N0.getOperand(0);
17407 SDValue N01 = N0.getOperand(1);
17408 // Avoid an infinite loop by making sure that N00 is not a constant
17409 // (the inner multiply has not been constant folded yet).
17412 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
17413 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
17414 }
17415 }
17416
17417 // Match a special-case: we convert X * 2.0 into fadd.
17418 // fmul (fadd X, X), C -> fmul X, 2.0 * C
17419 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
17420 N0.getOperand(0) == N0.getOperand(1)) {
17421 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
17422 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
17423 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
17424 }
17425
17426 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
17427 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
17428 VT, N0, N1, Flags))
17429 return SD;
17430 }
17431
17432 // fold (fmul X, 2.0) -> (fadd X, X)
17433 if (N1CFP && N1CFP->isExactlyValue(+2.0))
17434 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
17435
17436 // fold (fmul X, -1.0) -> (fsub -0.0, X)
17437 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
17438 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
17439 return DAG.getNode(ISD::FSUB, DL, VT,
17440 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
17441 }
17442 }
17443
17444 // -N0 * -N1 --> N0 * N1
17449 SDValue NegN0 =
17450 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17451 if (NegN0) {
17452 HandleSDNode NegN0Handle(NegN0);
17453 SDValue NegN1 =
17454 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17455 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17457 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
17458 }
17459
17460 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
17461 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
17462 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
17463 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
17464 TLI.isOperationLegal(ISD::FABS, VT)) {
17465 SDValue Select = N0, X = N1;
17466 if (Select.getOpcode() != ISD::SELECT)
17467 std::swap(Select, X);
17468
17469 SDValue Cond = Select.getOperand(0);
17470 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
17471 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
17472
17473 if (TrueOpnd && FalseOpnd &&
17474 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
17475 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
17476 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
17477 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17478 switch (CC) {
17479 default: break;
17480 case ISD::SETOLT:
17481 case ISD::SETULT:
17482 case ISD::SETOLE:
17483 case ISD::SETULE:
17484 case ISD::SETLT:
17485 case ISD::SETLE:
17486 std::swap(TrueOpnd, FalseOpnd);
17487 [[fallthrough]];
17488 case ISD::SETOGT:
17489 case ISD::SETUGT:
17490 case ISD::SETOGE:
17491 case ISD::SETUGE:
17492 case ISD::SETGT:
17493 case ISD::SETGE:
17494 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
17495 TLI.isOperationLegal(ISD::FNEG, VT))
17496 return DAG.getNode(ISD::FNEG, DL, VT,
17497 DAG.getNode(ISD::FABS, DL, VT, X));
17498 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
17499 return DAG.getNode(ISD::FABS, DL, VT, X);
17500
17501 break;
17502 }
17503 }
17504 }
17505
17506 // FMUL -> FMA combines:
17507 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
17508 AddToWorklist(Fused.getNode());
17509 return Fused;
17510 }
17511
17512 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
17513 // able to run.
17514 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17515 return R;
17516
17517 return SDValue();
17518}
17519
17520template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
17521 SDValue N0 = N->getOperand(0);
17522 SDValue N1 = N->getOperand(1);
17523 SDValue N2 = N->getOperand(2);
17524 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
17525 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
17526 EVT VT = N->getValueType(0);
17527 SDLoc DL(N);
17528 const TargetOptions &Options = DAG.getTarget().Options;
17529 // FMA nodes have flags that propagate to the created nodes.
17530 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17531 MatchContextClass matcher(DAG, TLI, N);
17532
17533 // Constant fold FMA.
17534 if (SDValue C =
17535 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
17536 return C;
17537
17538 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
17543 SDValue NegN0 =
17544 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17545 if (NegN0) {
17546 HandleSDNode NegN0Handle(NegN0);
17547 SDValue NegN1 =
17548 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17549 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17551 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
17552 }
17553
17554 // FIXME: use fast math flags instead of Options.UnsafeFPMath
17555 if (Options.UnsafeFPMath) {
17556 if (N0CFP && N0CFP->isZero())
17557 return N2;
17558 if (N1CFP && N1CFP->isZero())
17559 return N2;
17560 }
17561
17562 // FIXME: Support splat of constant.
17563 if (N0CFP && N0CFP->isExactlyValue(1.0))
17564 return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
17565 if (N1CFP && N1CFP->isExactlyValue(1.0))
17566 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
17567
17568 // Canonicalize (fma c, x, y) -> (fma x, c, y)
17571 return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
17572
17573 bool CanReassociate =
17574 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
17575 if (CanReassociate) {
17576 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
17577 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
17580 return matcher.getNode(
17581 ISD::FMUL, DL, VT, N0,
17582 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
17583 }
17584
17585 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
17586 if (matcher.match(N0, ISD::FMUL) &&
17589 return matcher.getNode(
17590 ISD::FMA, DL, VT, N0.getOperand(0),
17591 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
17592 }
17593 }
17594
17595 // (fma x, -1, y) -> (fadd (fneg x), y)
17596 // FIXME: Support splat of constant.
17597 if (N1CFP) {
17598 if (N1CFP->isExactlyValue(1.0))
17599 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
17600
17601 if (N1CFP->isExactlyValue(-1.0) &&
17602 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
17603 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
17604 AddToWorklist(RHSNeg.getNode());
17605 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
17606 }
17607
17608 // fma (fneg x), K, y -> fma x -K, y
17609 if (matcher.match(N0, ISD::FNEG) &&
17611 (N1.hasOneUse() &&
17612 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
17613 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
17614 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
17615 }
17616 }
17617
17618 // FIXME: Support splat of constant.
17619 if (CanReassociate) {
17620 // (fma x, c, x) -> (fmul x, (c+1))
17621 if (N1CFP && N0 == N2) {
17622 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17623 matcher.getNode(ISD::FADD, DL, VT, N1,
17624 DAG.getConstantFP(1.0, DL, VT)));
17625 }
17626
17627 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
17628 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
17629 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17630 matcher.getNode(ISD::FADD, DL, VT, N1,
17631 DAG.getConstantFP(-1.0, DL, VT)));
17632 }
17633 }
17634
17635 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
17636 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
17637 if (!TLI.isFNegFree(VT))
17639 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
17640 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
17641 return SDValue();
17642}
17643
17644SDValue DAGCombiner::visitFMAD(SDNode *N) {
17645 SDValue N0 = N->getOperand(0);
17646 SDValue N1 = N->getOperand(1);
17647 SDValue N2 = N->getOperand(2);
17648 EVT VT = N->getValueType(0);
17649 SDLoc DL(N);
17650
17651 // Constant fold FMAD.
17652 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMAD, DL, VT, {N0, N1, N2}))
17653 return C;
17654
17655 return SDValue();
17656}
17657
17658// Combine multiple FDIVs with the same divisor into multiple FMULs by the
17659// reciprocal.
17660// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
17661// Notice that this is not always beneficial. One reason is different targets
17662// may have different costs for FDIV and FMUL, so sometimes the cost of two
17663// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
17664// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
17665SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
17666 // TODO: Limit this transform based on optsize/minsize - it always creates at
17667 // least 1 extra instruction. But the perf win may be substantial enough
17668 // that only minsize should restrict this.
17669 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
17670 const SDNodeFlags Flags = N->getFlags();
17671 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
17672 return SDValue();
17673
17674 // Skip if current node is a reciprocal/fneg-reciprocal.
17675 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
17676 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
17677 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
17678 return SDValue();
17679
17680 // Exit early if the target does not want this transform or if there can't
17681 // possibly be enough uses of the divisor to make the transform worthwhile.
17682 unsigned MinUses = TLI.combineRepeatedFPDivisors();
17683
17684 // For splat vectors, scale the number of uses by the splat factor. If we can
17685 // convert the division into a scalar op, that will likely be much faster.
17686 unsigned NumElts = 1;
17687 EVT VT = N->getValueType(0);
17688 if (VT.isVector() && DAG.isSplatValue(N1))
17689 NumElts = VT.getVectorMinNumElements();
17690
17691 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
17692 return SDValue();
17693
17694 // Find all FDIV users of the same divisor.
17695 // Use a set because duplicates may be present in the user list.
17697 for (auto *U : N1->users()) {
17698 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
17699 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
17700 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
17701 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
17702 U->getFlags().hasAllowReassociation() &&
17703 U->getFlags().hasNoSignedZeros())
17704 continue;
17705
17706 // This division is eligible for optimization only if global unsafe math
17707 // is enabled or if this division allows reciprocal formation.
17708 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
17709 Users.insert(U);
17710 }
17711 }
17712
17713 // Now that we have the actual number of divisor uses, make sure it meets
17714 // the minimum threshold specified by the target.
17715 if ((Users.size() * NumElts) < MinUses)
17716 return SDValue();
17717
17718 SDLoc DL(N);
17719 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17720 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
17721
17722 // Dividend / Divisor -> Dividend * Reciprocal
17723 for (auto *U : Users) {
17724 SDValue Dividend = U->getOperand(0);
17725 if (Dividend != FPOne) {
17726 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
17727 Reciprocal, Flags);
17728 CombineTo(U, NewNode);
17729 } else if (U != Reciprocal.getNode()) {
17730 // In the absence of fast-math-flags, this user node is always the
17731 // same node as Reciprocal, but with FMF they may be different nodes.
17732 CombineTo(U, Reciprocal);
17733 }
17734 }
17735 return SDValue(N, 0); // N was replaced.
17736}
17737
17738SDValue DAGCombiner::visitFDIV(SDNode *N) {
17739 SDValue N0 = N->getOperand(0);
17740 SDValue N1 = N->getOperand(1);
17741 EVT VT = N->getValueType(0);
17742 SDLoc DL(N);
17743 const TargetOptions &Options = DAG.getTarget().Options;
17744 SDNodeFlags Flags = N->getFlags();
17745 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17746
17747 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17748 return R;
17749
17750 // fold (fdiv c1, c2) -> c1/c2
17751 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17752 return C;
17753
17754 // fold vector ops
17755 if (VT.isVector())
17756 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17757 return FoldedVOp;
17758
17759 if (SDValue NewSel = foldBinOpIntoSelect(N))
17760 return NewSel;
17761
17763 return V;
17764
17765 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
17766 // the loss is acceptable with AllowReciprocal.
17767 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
17768 // Compute the reciprocal 1.0 / c2.
17769 const APFloat &N1APF = N1CFP->getValueAPF();
17770 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
17772 // Only do the transform if the reciprocal is a legal fp immediate that
17773 // isn't too nasty (eg NaN, denormal, ...).
17774 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
17775 (st == APFloat::opInexact &&
17776 (Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) &&
17777 (!LegalOperations ||
17778 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17779 // backend)... we should handle this gracefully after Legalize.
17780 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17782 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17783 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17784 DAG.getConstantFP(Recip, DL, VT));
17785 }
17786
17787 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17788 // If this FDIV is part of a reciprocal square root, it may be folded
17789 // into a target-specific square root estimate instruction.
17790 if (N1.getOpcode() == ISD::FSQRT) {
17791 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17792 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17793 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17794 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17795 if (SDValue RV =
17796 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17797 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17798 AddToWorklist(RV.getNode());
17799 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17800 }
17801 } else if (N1.getOpcode() == ISD::FP_ROUND &&
17802 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17803 if (SDValue RV =
17804 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17805 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17806 AddToWorklist(RV.getNode());
17807 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17808 }
17809 } else if (N1.getOpcode() == ISD::FMUL) {
17810 // Look through an FMUL. Even though this won't remove the FDIV directly,
17811 // it's still worthwhile to get rid of the FSQRT if possible.
17812 SDValue Sqrt, Y;
17813 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17814 Sqrt = N1.getOperand(0);
17815 Y = N1.getOperand(1);
17816 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17817 Sqrt = N1.getOperand(1);
17818 Y = N1.getOperand(0);
17819 }
17820 if (Sqrt.getNode()) {
17821 // If the other multiply operand is known positive, pull it into the
17822 // sqrt. That will eliminate the division if we convert to an estimate.
17823 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17824 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17825 SDValue A;
17826 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17827 A = Y.getOperand(0);
17828 else if (Y == Sqrt.getOperand(0))
17829 A = Y;
17830 if (A) {
17831 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17832 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17833 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17834 SDValue AAZ =
17835 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17836 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17837 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17838
17839 // Estimate creation failed. Clean up speculatively created nodes.
17840 recursivelyDeleteUnusedNodes(AAZ.getNode());
17841 }
17842 }
17843
17844 // We found a FSQRT, so try to make this fold:
17845 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17846 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17847 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17848 AddToWorklist(Div.getNode());
17849 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17850 }
17851 }
17852 }
17853
17854 // Fold into a reciprocal estimate and multiply instead of a real divide.
17855 if (Options.NoInfsFPMath || Flags.hasNoInfs())
17856 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17857 return RV;
17858 }
17859
17860 // Fold X/Sqrt(X) -> Sqrt(X)
17861 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17862 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17863 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17864 return N1;
17865
17866 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17871 SDValue NegN0 =
17872 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17873 if (NegN0) {
17874 HandleSDNode NegN0Handle(NegN0);
17875 SDValue NegN1 =
17876 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17877 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17879 return DAG.getNode(ISD::FDIV, DL, VT, NegN0, NegN1);
17880 }
17881
17882 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17883 return R;
17884
17885 return SDValue();
17886}
17887
17888SDValue DAGCombiner::visitFREM(SDNode *N) {
17889 SDValue N0 = N->getOperand(0);
17890 SDValue N1 = N->getOperand(1);
17891 EVT VT = N->getValueType(0);
17892 SDNodeFlags Flags = N->getFlags();
17893 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17894 SDLoc DL(N);
17895
17896 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17897 return R;
17898
17899 // fold (frem c1, c2) -> fmod(c1,c2)
17900 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
17901 return C;
17902
17903 if (SDValue NewSel = foldBinOpIntoSelect(N))
17904 return NewSel;
17905
17906 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
17907 // power of 2.
17908 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
17912 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
17913 bool NeedsCopySign =
17914 !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
17915 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
17916 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
17917 SDValue MLA;
17919 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
17920 N1, N0);
17921 } else {
17922 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
17923 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
17924 }
17925 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
17926 }
17927
17928 return SDValue();
17929}
17930
17931SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17932 SDNodeFlags Flags = N->getFlags();
17933 const TargetOptions &Options = DAG.getTarget().Options;
17934
17935 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17936 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17937 if (!Flags.hasApproximateFuncs() ||
17938 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17939 return SDValue();
17940
17941 SDValue N0 = N->getOperand(0);
17942 if (TLI.isFsqrtCheap(N0, DAG))
17943 return SDValue();
17944
17945 // FSQRT nodes have flags that propagate to the created nodes.
17946 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17947 // transform the fdiv, we may produce a sub-optimal estimate sequence
17948 // because the reciprocal calculation may not have to filter out a
17949 // 0.0 input.
17950 return buildSqrtEstimate(N0, Flags);
17951}
17952
17953/// copysign(x, fp_extend(y)) -> copysign(x, y)
17954/// copysign(x, fp_round(y)) -> copysign(x, y)
17955/// Operands to the functions are the type of X and Y respectively.
17956static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17957 // Always fold no-op FP casts.
17958 if (XTy == YTy)
17959 return true;
17960
17961 // Do not optimize out type conversion of f128 type yet.
17962 // For some targets like x86_64, configuration is changed to keep one f128
17963 // value in one SSE register, but instruction selection cannot handle
17964 // FCOPYSIGN on SSE registers yet.
17965 if (YTy == MVT::f128)
17966 return false;
17967
17969}
17970
17972 SDValue N1 = N->getOperand(1);
17973 if (N1.getOpcode() != ISD::FP_EXTEND &&
17974 N1.getOpcode() != ISD::FP_ROUND)
17975 return false;
17976 EVT N1VT = N1->getValueType(0);
17977 EVT N1Op0VT = N1->getOperand(0).getValueType();
17978 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17979}
17980
17981SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17982 SDValue N0 = N->getOperand(0);
17983 SDValue N1 = N->getOperand(1);
17984 EVT VT = N->getValueType(0);
17985 SDLoc DL(N);
17986
17987 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17988 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
17989 return C;
17990
17991 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17992 const APFloat &V = N1C->getValueAPF();
17993 // copysign(x, c1) -> fabs(x) iff ispos(c1)
17994 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17995 if (!V.isNegative()) {
17996 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17997 return DAG.getNode(ISD::FABS, DL, VT, N0);
17998 } else {
17999 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
18000 return DAG.getNode(ISD::FNEG, DL, VT,
18001 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
18002 }
18003 }
18004
18005 // copysign(fabs(x), y) -> copysign(x, y)
18006 // copysign(fneg(x), y) -> copysign(x, y)
18007 // copysign(copysign(x,z), y) -> copysign(x, y)
18008 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
18009 N0.getOpcode() == ISD::FCOPYSIGN)
18010 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0.getOperand(0), N1);
18011
18012 // copysign(x, abs(y)) -> abs(x)
18013 if (N1.getOpcode() == ISD::FABS)
18014 return DAG.getNode(ISD::FABS, DL, VT, N0);
18015
18016 // copysign(x, copysign(y,z)) -> copysign(x, z)
18017 if (N1.getOpcode() == ISD::FCOPYSIGN)
18018 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(1));
18019
18020 // copysign(x, fp_extend(y)) -> copysign(x, y)
18021 // copysign(x, fp_round(y)) -> copysign(x, y)
18023 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
18024
18025 // We only take the sign bit from the sign operand.
18026 EVT SignVT = N1.getValueType();
18027 if (SimplifyDemandedBits(N1,
18029 return SDValue(N, 0);
18030
18031 // We only take the non-sign bits from the value operand
18032 if (SimplifyDemandedBits(N0,
18034 return SDValue(N, 0);
18035
18036 return SDValue();
18037}
18038
18039SDValue DAGCombiner::visitFPOW(SDNode *N) {
18040 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
18041 if (!ExponentC)
18042 return SDValue();
18043 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18044
18045 // Try to convert x ** (1/3) into cube root.
18046 // TODO: Handle the various flavors of long double.
18047 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
18048 // Some range near 1/3 should be fine.
18049 EVT VT = N->getValueType(0);
18050 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
18051 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
18052 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
18053 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
18054 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
18055 // For regular numbers, rounding may cause the results to differ.
18056 // Therefore, we require { nsz ninf nnan afn } for this transform.
18057 // TODO: We could select out the special cases if we don't have nsz/ninf.
18058 SDNodeFlags Flags = N->getFlags();
18059 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
18060 !Flags.hasApproximateFuncs())
18061 return SDValue();
18062
18063 // Do not create a cbrt() libcall if the target does not have it, and do not
18064 // turn a pow that has lowering support into a cbrt() libcall.
18065 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
18068 return SDValue();
18069
18070 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
18071 }
18072
18073 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
18074 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
18075 // TODO: This could be extended (using a target hook) to handle smaller
18076 // power-of-2 fractional exponents.
18077 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
18078 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
18079 if (ExponentIs025 || ExponentIs075) {
18080 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
18081 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
18082 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
18083 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
18084 // For regular numbers, rounding may cause the results to differ.
18085 // Therefore, we require { nsz ninf afn } for this transform.
18086 // TODO: We could select out the special cases if we don't have nsz/ninf.
18087 SDNodeFlags Flags = N->getFlags();
18088
18089 // We only need no signed zeros for the 0.25 case.
18090 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
18091 !Flags.hasApproximateFuncs())
18092 return SDValue();
18093
18094 // Don't double the number of libcalls. We are trying to inline fast code.
18096 return SDValue();
18097
18098 // Assume that libcalls are the smallest code.
18099 // TODO: This restriction should probably be lifted for vectors.
18100 if (ForCodeSize)
18101 return SDValue();
18102
18103 // pow(X, 0.25) --> sqrt(sqrt(X))
18104 SDLoc DL(N);
18105 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
18106 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
18107 if (ExponentIs025)
18108 return SqrtSqrt;
18109 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
18110 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
18111 }
18112
18113 return SDValue();
18114}
18115
18117 const TargetLowering &TLI) {
18118 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
18119 // replacing casts with a libcall. We also must be allowed to ignore -0.0
18120 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
18121 // conversions would return +0.0.
18122 // FIXME: We should be able to use node-level FMF here.
18123 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
18124 EVT VT = N->getValueType(0);
18125 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
18127 return SDValue();
18128
18129 // fptosi/fptoui round towards zero, so converting from FP to integer and
18130 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
18131 SDValue N0 = N->getOperand(0);
18132 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
18133 N0.getOperand(0).getValueType() == VT)
18134 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18135
18136 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
18137 N0.getOperand(0).getValueType() == VT)
18138 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18139
18140 return SDValue();
18141}
18142
18143SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
18144 SDValue N0 = N->getOperand(0);
18145 EVT VT = N->getValueType(0);
18146 EVT OpVT = N0.getValueType();
18147 SDLoc DL(N);
18148
18149 // [us]itofp(undef) = 0, because the result value is bounded.
18150 if (N0.isUndef())
18151 return DAG.getConstantFP(0.0, DL, VT);
18152
18153 // fold (sint_to_fp c1) -> c1fp
18154 // ...but only if the target supports immediate floating-point values
18155 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18156 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SINT_TO_FP, DL, VT, {N0}))
18157 return C;
18158
18159 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
18160 // but UINT_TO_FP is legal on this target, try to convert.
18161 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
18162 hasOperation(ISD::UINT_TO_FP, OpVT)) {
18163 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
18164 if (DAG.SignBitIsZero(N0))
18165 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0);
18166 }
18167
18168 // The next optimizations are desirable only if SELECT_CC can be lowered.
18169 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
18170 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
18171 !VT.isVector() &&
18172 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18173 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
18174 DAG.getConstantFP(0.0, DL, VT));
18175
18176 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
18177 // (select (setcc x, y, cc), 1.0, 0.0)
18178 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
18179 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
18180 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18181 return DAG.getSelect(DL, VT, N0.getOperand(0),
18182 DAG.getConstantFP(1.0, DL, VT),
18183 DAG.getConstantFP(0.0, DL, VT));
18184
18185 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18186 return FTrunc;
18187
18188 return SDValue();
18189}
18190
18191SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
18192 SDValue N0 = N->getOperand(0);
18193 EVT VT = N->getValueType(0);
18194 EVT OpVT = N0.getValueType();
18195 SDLoc DL(N);
18196
18197 // [us]itofp(undef) = 0, because the result value is bounded.
18198 if (N0.isUndef())
18199 return DAG.getConstantFP(0.0, DL, VT);
18200
18201 // fold (uint_to_fp c1) -> c1fp
18202 // ...but only if the target supports immediate floating-point values
18203 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18204 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UINT_TO_FP, DL, VT, {N0}))
18205 return C;
18206
18207 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
18208 // but SINT_TO_FP is legal on this target, try to convert.
18209 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
18210 hasOperation(ISD::SINT_TO_FP, OpVT)) {
18211 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
18212 if (DAG.SignBitIsZero(N0))
18213 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0);
18214 }
18215
18216 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
18217 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
18218 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18219 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
18220 DAG.getConstantFP(0.0, DL, VT));
18221
18222 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18223 return FTrunc;
18224
18225 return SDValue();
18226}
18227
18228// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
18230 SDValue N0 = N->getOperand(0);
18231 EVT VT = N->getValueType(0);
18232
18233 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
18234 return SDValue();
18235
18236 SDValue Src = N0.getOperand(0);
18237 EVT SrcVT = Src.getValueType();
18238 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
18239 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
18240
18241 // We can safely assume the conversion won't overflow the output range,
18242 // because (for example) (uint8_t)18293.f is undefined behavior.
18243
18244 // Since we can assume the conversion won't overflow, our decision as to
18245 // whether the input will fit in the float should depend on the minimum
18246 // of the input range and output range.
18247
18248 // This means this is also safe for a signed input and unsigned output, since
18249 // a negative input would lead to undefined behavior.
18250 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
18251 unsigned OutputSize = (int)VT.getScalarSizeInBits();
18252 unsigned ActualSize = std::min(InputSize, OutputSize);
18253 const fltSemantics &Sem = N0.getValueType().getFltSemantics();
18254
18255 // We can only fold away the float conversion if the input range can be
18256 // represented exactly in the float range.
18257 if (APFloat::semanticsPrecision(Sem) >= ActualSize) {
18258 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
18259 unsigned ExtOp =
18260 IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
18261 return DAG.getNode(ExtOp, DL, VT, Src);
18262 }
18263 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
18264 return DAG.getNode(ISD::TRUNCATE, DL, VT, Src);
18265 return DAG.getBitcast(VT, Src);
18266 }
18267 return SDValue();
18268}
18269
18270SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
18271 SDValue N0 = N->getOperand(0);
18272 EVT VT = N->getValueType(0);
18273 SDLoc DL(N);
18274
18275 // fold (fp_to_sint undef) -> undef
18276 if (N0.isUndef())
18277 return DAG.getUNDEF(VT);
18278
18279 // fold (fp_to_sint c1fp) -> c1
18280 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_SINT, DL, VT, {N0}))
18281 return C;
18282
18283 return FoldIntToFPToInt(N, DL, DAG);
18284}
18285
18286SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
18287 SDValue N0 = N->getOperand(0);
18288 EVT VT = N->getValueType(0);
18289 SDLoc DL(N);
18290
18291 // fold (fp_to_uint undef) -> undef
18292 if (N0.isUndef())
18293 return DAG.getUNDEF(VT);
18294
18295 // fold (fp_to_uint c1fp) -> c1
18296 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_UINT, DL, VT, {N0}))
18297 return C;
18298
18299 return FoldIntToFPToInt(N, DL, DAG);
18300}
18301
18302SDValue DAGCombiner::visitXROUND(SDNode *N) {
18303 SDValue N0 = N->getOperand(0);
18304 EVT VT = N->getValueType(0);
18305
18306 // fold (lrint|llrint undef) -> undef
18307 // fold (lround|llround undef) -> undef
18308 if (N0.isUndef())
18309 return DAG.getUNDEF(VT);
18310
18311 // fold (lrint|llrint c1fp) -> c1
18312 // fold (lround|llround c1fp) -> c1
18313 if (SDValue C =
18314 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
18315 return C;
18316
18317 return SDValue();
18318}
18319
18320SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
18321 SDValue N0 = N->getOperand(0);
18322 SDValue N1 = N->getOperand(1);
18323 EVT VT = N->getValueType(0);
18324 SDLoc DL(N);
18325
18326 // fold (fp_round c1fp) -> c1fp
18327 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_ROUND, DL, VT, {N0, N1}))
18328 return C;
18329
18330 // fold (fp_round (fp_extend x)) -> x
18331 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
18332 return N0.getOperand(0);
18333
18334 // fold (fp_round (fp_round x)) -> (fp_round x)
18335 if (N0.getOpcode() == ISD::FP_ROUND) {
18336 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
18337 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
18338
18339 // Avoid folding legal fp_rounds into non-legal ones.
18340 if (!hasOperation(ISD::FP_ROUND, VT))
18341 return SDValue();
18342
18343 // Skip this folding if it results in an fp_round from f80 to f16.
18344 //
18345 // f80 to f16 always generates an expensive (and as yet, unimplemented)
18346 // libcall to __truncxfhf2 instead of selecting native f16 conversion
18347 // instructions from f32 or f64. Moreover, the first (value-preserving)
18348 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
18349 // x86.
18350 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
18351 return SDValue();
18352
18353 // If the first fp_round isn't a value preserving truncation, it might
18354 // introduce a tie in the second fp_round, that wouldn't occur in the
18355 // single-step fp_round we want to fold to.
18356 // In other words, double rounding isn't the same as rounding.
18357 // Also, this is a value preserving truncation iff both fp_round's are.
18358 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
18359 return DAG.getNode(
18360 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
18361 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
18362 }
18363
18364 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
18365 // Note: From a legality perspective, this is a two step transform. First,
18366 // we duplicate the fp_round to the arguments of the copysign, then we
18367 // eliminate the fp_round on Y. The second step requires an additional
18368 // predicate to match the implementation above.
18369 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
18371 N0.getValueType())) {
18372 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
18373 N0.getOperand(0), N1);
18374 AddToWorklist(Tmp.getNode());
18375 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, Tmp, N0.getOperand(1));
18376 }
18377
18378 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
18379 return NewVSel;
18380
18381 return SDValue();
18382}
18383
18384SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
18385 SDValue N0 = N->getOperand(0);
18386 EVT VT = N->getValueType(0);
18387 SDLoc DL(N);
18388
18389 if (VT.isVector())
18390 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
18391 return FoldedVOp;
18392
18393 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
18394 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
18395 return SDValue();
18396
18397 // fold (fp_extend c1fp) -> c1fp
18398 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_EXTEND, DL, VT, {N0}))
18399 return C;
18400
18401 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
18402 if (N0.getOpcode() == ISD::FP16_TO_FP &&
18404 return DAG.getNode(ISD::FP16_TO_FP, DL, VT, N0.getOperand(0));
18405
18406 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
18407 // value of X.
18408 if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) {
18409 SDValue In = N0.getOperand(0);
18410 if (In.getValueType() == VT) return In;
18411 if (VT.bitsLT(In.getValueType()))
18412 return DAG.getNode(ISD::FP_ROUND, DL, VT, In, N0.getOperand(1));
18413 return DAG.getNode(ISD::FP_EXTEND, DL, VT, In);
18414 }
18415
18416 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
18417 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
18419 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
18420 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT,
18421 LN0->getChain(),
18422 LN0->getBasePtr(), N0.getValueType(),
18423 LN0->getMemOperand());
18424 CombineTo(N, ExtLoad);
18425 CombineTo(
18426 N0.getNode(),
18427 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
18428 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
18429 ExtLoad.getValue(1));
18430 return SDValue(N, 0); // Return N so it doesn't get rechecked!
18431 }
18432
18433 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
18434 return NewVSel;
18435
18436 return SDValue();
18437}
18438
18439SDValue DAGCombiner::visitFCEIL(SDNode *N) {
18440 SDValue N0 = N->getOperand(0);
18441 EVT VT = N->getValueType(0);
18442
18443 // fold (fceil c1) -> fceil(c1)
18444 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCEIL, SDLoc(N), VT, {N0}))
18445 return C;
18446
18447 return SDValue();
18448}
18449
18450SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
18451 SDValue N0 = N->getOperand(0);
18452 EVT VT = N->getValueType(0);
18453
18454 // fold (ftrunc c1) -> ftrunc(c1)
18455 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FTRUNC, SDLoc(N), VT, {N0}))
18456 return C;
18457
18458 // fold ftrunc (known rounded int x) -> x
18459 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
18460 // likely to be generated to extract integer from a rounded floating value.
18461 switch (N0.getOpcode()) {
18462 default: break;
18463 case ISD::FRINT:
18464 case ISD::FTRUNC:
18465 case ISD::FNEARBYINT:
18466 case ISD::FROUNDEVEN:
18467 case ISD::FFLOOR:
18468 case ISD::FCEIL:
18469 return N0;
18470 }
18471
18472 return SDValue();
18473}
18474
18475SDValue DAGCombiner::visitFFREXP(SDNode *N) {
18476 SDValue N0 = N->getOperand(0);
18477
18478 // fold (ffrexp c1) -> ffrexp(c1)
18480 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
18481 return SDValue();
18482}
18483
18484SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
18485 SDValue N0 = N->getOperand(0);
18486 EVT VT = N->getValueType(0);
18487
18488 // fold (ffloor c1) -> ffloor(c1)
18489 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FFLOOR, SDLoc(N), VT, {N0}))
18490 return C;
18491
18492 return SDValue();
18493}
18494
18495SDValue DAGCombiner::visitFNEG(SDNode *N) {
18496 SDValue N0 = N->getOperand(0);
18497 EVT VT = N->getValueType(0);
18498 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18499
18500 // Constant fold FNEG.
18501 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FNEG, SDLoc(N), VT, {N0}))
18502 return C;
18503
18504 if (SDValue NegN0 =
18505 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
18506 return NegN0;
18507
18508 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
18509 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
18510 // know it was called from a context with a nsz flag if the input fsub does
18511 // not.
18512 if (N0.getOpcode() == ISD::FSUB &&
18514 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
18515 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
18516 N0.getOperand(0));
18517 }
18518
18519 if (SDValue Cast = foldSignChangeInBitcast(N))
18520 return Cast;
18521
18522 return SDValue();
18523}
18524
18525SDValue DAGCombiner::visitFMinMax(SDNode *N) {
18526 SDValue N0 = N->getOperand(0);
18527 SDValue N1 = N->getOperand(1);
18528 EVT VT = N->getValueType(0);
18529 const SDNodeFlags Flags = N->getFlags();
18530 unsigned Opc = N->getOpcode();
18531 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
18532 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
18533 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18534
18535 // Constant fold.
18536 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
18537 return C;
18538
18539 // Canonicalize to constant on RHS.
18542 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
18543
18544 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
18545 const APFloat &AF = N1CFP->getValueAPF();
18546
18547 // minnum(X, nan) -> X
18548 // maxnum(X, nan) -> X
18549 // minimum(X, nan) -> nan
18550 // maximum(X, nan) -> nan
18551 if (AF.isNaN())
18552 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
18553
18554 // In the following folds, inf can be replaced with the largest finite
18555 // float, if the ninf flag is set.
18556 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
18557 // minnum(X, -inf) -> -inf
18558 // maxnum(X, +inf) -> +inf
18559 // minimum(X, -inf) -> -inf if nnan
18560 // maximum(X, +inf) -> +inf if nnan
18561 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
18562 return N->getOperand(1);
18563
18564 // minnum(X, +inf) -> X if nnan
18565 // maxnum(X, -inf) -> X if nnan
18566 // minimum(X, +inf) -> X
18567 // maximum(X, -inf) -> X
18568 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
18569 return N->getOperand(0);
18570 }
18571 }
18572
18573 if (SDValue SD = reassociateReduction(
18574 PropagatesNaN
18577 Opc, SDLoc(N), VT, N0, N1, Flags))
18578 return SD;
18579
18580 return SDValue();
18581}
18582
18583SDValue DAGCombiner::visitFABS(SDNode *N) {
18584 SDValue N0 = N->getOperand(0);
18585 EVT VT = N->getValueType(0);
18586 SDLoc DL(N);
18587
18588 // fold (fabs c1) -> fabs(c1)
18589 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
18590 return C;
18591
18592 // fold (fabs (fabs x)) -> (fabs x)
18593 if (N0.getOpcode() == ISD::FABS)
18594 return N->getOperand(0);
18595
18596 // fold (fabs (fneg x)) -> (fabs x)
18597 // fold (fabs (fcopysign x, y)) -> (fabs x)
18598 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
18599 return DAG.getNode(ISD::FABS, DL, VT, N0.getOperand(0));
18600
18601 if (SDValue Cast = foldSignChangeInBitcast(N))
18602 return Cast;
18603
18604 return SDValue();
18605}
18606
18607SDValue DAGCombiner::visitBRCOND(SDNode *N) {
18608 SDValue Chain = N->getOperand(0);
18609 SDValue N1 = N->getOperand(1);
18610 SDValue N2 = N->getOperand(2);
18611
18612 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
18613 // nondeterministic jumps).
18614 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
18615 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18616 N1->getOperand(0), N2, N->getFlags());
18617 }
18618
18619 // Variant of the previous fold where there is a SETCC in between:
18620 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
18621 // =>
18622 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
18623 // =>
18624 // BRCOND(SETCC(X, CONST, Cond))
18625 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
18626 // isn't equivalent to true or false.
18627 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
18628 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
18629 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
18630 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
18631 ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
18632 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
18633 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
18634 bool Updated = false;
18635
18636 // Is 'X Cond C' always true or false?
18637 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
18638 bool False = (Cond == ISD::SETULT && C->isZero()) ||
18639 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
18640 (Cond == ISD::SETUGT && C->isAllOnes()) ||
18641 (Cond == ISD::SETGT && C->isMaxSignedValue());
18642 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
18643 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
18644 (Cond == ISD::SETUGE && C->isZero()) ||
18645 (Cond == ISD::SETGE && C->isMinSignedValue());
18646 return True || False;
18647 };
18648
18649 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
18650 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
18651 S0 = S0->getOperand(0);
18652 Updated = true;
18653 }
18654 }
18655 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
18656 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
18657 S1 = S1->getOperand(0);
18658 Updated = true;
18659 }
18660 }
18661
18662 if (Updated)
18663 return DAG.getNode(
18664 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18665 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
18666 N->getFlags());
18667 }
18668
18669 // If N is a constant we could fold this into a fallthrough or unconditional
18670 // branch. However that doesn't happen very often in normal code, because
18671 // Instcombine/SimplifyCFG should have handled the available opportunities.
18672 // If we did this folding here, it would be necessary to update the
18673 // MachineBasicBlock CFG, which is awkward.
18674
18675 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
18676 // on the target.
18677 if (N1.getOpcode() == ISD::SETCC &&
18679 N1.getOperand(0).getValueType())) {
18680 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18681 Chain, N1.getOperand(2),
18682 N1.getOperand(0), N1.getOperand(1), N2);
18683 }
18684
18685 if (N1.hasOneUse()) {
18686 // rebuildSetCC calls visitXor which may change the Chain when there is a
18687 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
18688 HandleSDNode ChainHandle(Chain);
18689 if (SDValue NewN1 = rebuildSetCC(N1))
18690 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
18691 ChainHandle.getValue(), NewN1, N2, N->getFlags());
18692 }
18693
18694 return SDValue();
18695}
18696
18697SDValue DAGCombiner::rebuildSetCC(SDValue N) {
18698 if (N.getOpcode() == ISD::SRL ||
18699 (N.getOpcode() == ISD::TRUNCATE &&
18700 (N.getOperand(0).hasOneUse() &&
18701 N.getOperand(0).getOpcode() == ISD::SRL))) {
18702 // Look pass the truncate.
18703 if (N.getOpcode() == ISD::TRUNCATE)
18704 N = N.getOperand(0);
18705
18706 // Match this pattern so that we can generate simpler code:
18707 //
18708 // %a = ...
18709 // %b = and i32 %a, 2
18710 // %c = srl i32 %b, 1
18711 // brcond i32 %c ...
18712 //
18713 // into
18714 //
18715 // %a = ...
18716 // %b = and i32 %a, 2
18717 // %c = setcc eq %b, 0
18718 // brcond %c ...
18719 //
18720 // This applies only when the AND constant value has one bit set and the
18721 // SRL constant is equal to the log2 of the AND constant. The back-end is
18722 // smart enough to convert the result into a TEST/JMP sequence.
18723 SDValue Op0 = N.getOperand(0);
18724 SDValue Op1 = N.getOperand(1);
18725
18726 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
18727 SDValue AndOp1 = Op0.getOperand(1);
18728
18729 if (AndOp1.getOpcode() == ISD::Constant) {
18730 const APInt &AndConst = AndOp1->getAsAPIntVal();
18731
18732 if (AndConst.isPowerOf2() &&
18733 Op1->getAsAPIntVal() == AndConst.logBase2()) {
18734 SDLoc DL(N);
18735 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
18736 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
18737 ISD::SETNE);
18738 }
18739 }
18740 }
18741 }
18742
18743 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
18744 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
18745 if (N.getOpcode() == ISD::XOR) {
18746 // Because we may call this on a speculatively constructed
18747 // SimplifiedSetCC Node, we need to simplify this node first.
18748 // Ideally this should be folded into SimplifySetCC and not
18749 // here. For now, grab a handle to N so we don't lose it from
18750 // replacements interal to the visit.
18751 HandleSDNode XORHandle(N);
18752 while (N.getOpcode() == ISD::XOR) {
18753 SDValue Tmp = visitXOR(N.getNode());
18754 // No simplification done.
18755 if (!Tmp.getNode())
18756 break;
18757 // Returning N is form in-visit replacement that may invalidated
18758 // N. Grab value from Handle.
18759 if (Tmp.getNode() == N.getNode())
18760 N = XORHandle.getValue();
18761 else // Node simplified. Try simplifying again.
18762 N = Tmp;
18763 }
18764
18765 if (N.getOpcode() != ISD::XOR)
18766 return N;
18767
18768 SDValue Op0 = N->getOperand(0);
18769 SDValue Op1 = N->getOperand(1);
18770
18771 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
18772 bool Equal = false;
18773 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18774 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18775 Op0.getValueType() == MVT::i1) {
18776 N = Op0;
18777 Op0 = N->getOperand(0);
18778 Op1 = N->getOperand(1);
18779 Equal = true;
18780 }
18781
18782 EVT SetCCVT = N.getValueType();
18783 if (LegalTypes)
18784 SetCCVT = getSetCCResultType(SetCCVT);
18785 // Replace the uses of XOR with SETCC. Note, avoid this transformation if
18786 // it would introduce illegal operations post-legalization as this can
18787 // result in infinite looping between converting xor->setcc here, and
18788 // expanding setcc->xor in LegalizeSetCCCondCode if requested.
18790 if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
18791 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
18792 }
18793 }
18794
18795 return SDValue();
18796}
18797
18798// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18799//
18800SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18801 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18802 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18803
18804 // If N is a constant we could fold this into a fallthrough or unconditional
18805 // branch. However that doesn't happen very often in normal code, because
18806 // Instcombine/SimplifyCFG should have handled the available opportunities.
18807 // If we did this folding here, it would be necessary to update the
18808 // MachineBasicBlock CFG, which is awkward.
18809
18810 // Use SimplifySetCC to simplify SETCC's.
18812 CondLHS, CondRHS, CC->get(), SDLoc(N),
18813 false);
18814 if (Simp.getNode()) AddToWorklist(Simp.getNode());
18815
18816 // fold to a simpler setcc
18817 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18818 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18819 N->getOperand(0), Simp.getOperand(2),
18820 Simp.getOperand(0), Simp.getOperand(1),
18821 N->getOperand(4));
18822
18823 return SDValue();
18824}
18825
18826static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18827 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18828 const TargetLowering &TLI) {
18829 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18830 if (LD->isIndexed())
18831 return false;
18832 EVT VT = LD->getMemoryVT();
18833 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18834 return false;
18835 Ptr = LD->getBasePtr();
18836 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18837 if (ST->isIndexed())
18838 return false;
18839 EVT VT = ST->getMemoryVT();
18840 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18841 return false;
18842 Ptr = ST->getBasePtr();
18843 IsLoad = false;
18844 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18845 if (LD->isIndexed())
18846 return false;
18847 EVT VT = LD->getMemoryVT();
18848 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18849 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18850 return false;
18851 Ptr = LD->getBasePtr();
18852 IsMasked = true;
18853 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18854 if (ST->isIndexed())
18855 return false;
18856 EVT VT = ST->getMemoryVT();
18857 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18858 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18859 return false;
18860 Ptr = ST->getBasePtr();
18861 IsLoad = false;
18862 IsMasked = true;
18863 } else {
18864 return false;
18865 }
18866 return true;
18867}
18868
18869/// Try turning a load/store into a pre-indexed load/store when the base
18870/// pointer is an add or subtract and it has other uses besides the load/store.
18871/// After the transformation, the new indexed load/store has effectively folded
18872/// the add/subtract in and all of its other uses are redirected to the
18873/// new load/store.
18874bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18875 if (Level < AfterLegalizeDAG)
18876 return false;
18877
18878 bool IsLoad = true;
18879 bool IsMasked = false;
18880 SDValue Ptr;
18881 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18882 Ptr, TLI))
18883 return false;
18884
18885 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18886 // out. There is no reason to make this a preinc/predec.
18887 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18888 Ptr->hasOneUse())
18889 return false;
18890
18891 // Ask the target to do addressing mode selection.
18895 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18896 return false;
18897
18898 // Backends without true r+i pre-indexed forms may need to pass a
18899 // constant base with a variable offset so that constant coercion
18900 // will work with the patterns in canonical form.
18901 bool Swapped = false;
18902 if (isa<ConstantSDNode>(BasePtr)) {
18903 std::swap(BasePtr, Offset);
18904 Swapped = true;
18905 }
18906
18907 // Don't create a indexed load / store with zero offset.
18909 return false;
18910
18911 // Try turning it into a pre-indexed load / store except when:
18912 // 1) The new base ptr is a frame index.
18913 // 2) If N is a store and the new base ptr is either the same as or is a
18914 // predecessor of the value being stored.
18915 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18916 // that would create a cycle.
18917 // 4) All uses are load / store ops that use it as old base ptr.
18918
18919 // Check #1. Preinc'ing a frame index would require copying the stack pointer
18920 // (plus the implicit offset) to a register to preinc anyway.
18921 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18922 return false;
18923
18924 // Check #2.
18925 if (!IsLoad) {
18926 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18927 : cast<StoreSDNode>(N)->getValue();
18928
18929 // Would require a copy.
18930 if (Val == BasePtr)
18931 return false;
18932
18933 // Would create a cycle.
18934 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18935 return false;
18936 }
18937
18938 // Caches for hasPredecessorHelper.
18941 Worklist.push_back(N);
18942
18943 // If the offset is a constant, there may be other adds of constants that
18944 // can be folded with this one. We should do this to avoid having to keep
18945 // a copy of the original base pointer.
18946 SmallVector<SDNode *, 16> OtherUses;
18948 if (isa<ConstantSDNode>(Offset))
18949 for (SDUse &Use : BasePtr->uses()) {
18950 // Skip the use that is Ptr and uses of other results from BasePtr's
18951 // node (important for nodes that return multiple results).
18952 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18953 continue;
18954
18955 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18956 MaxSteps))
18957 continue;
18958
18959 if (Use.getUser()->getOpcode() != ISD::ADD &&
18960 Use.getUser()->getOpcode() != ISD::SUB) {
18961 OtherUses.clear();
18962 break;
18963 }
18964
18965 SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1);
18966 if (!isa<ConstantSDNode>(Op1)) {
18967 OtherUses.clear();
18968 break;
18969 }
18970
18971 // FIXME: In some cases, we can be smarter about this.
18972 if (Op1.getValueType() != Offset.getValueType()) {
18973 OtherUses.clear();
18974 break;
18975 }
18976
18977 OtherUses.push_back(Use.getUser());
18978 }
18979
18980 if (Swapped)
18981 std::swap(BasePtr, Offset);
18982
18983 // Now check for #3 and #4.
18984 bool RealUse = false;
18985
18986 for (SDNode *User : Ptr->users()) {
18987 if (User == N)
18988 continue;
18989 if (SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
18990 return false;
18991
18992 // If Ptr may be folded in addressing mode of other use, then it's
18993 // not profitable to do this transformation.
18994 if (!canFoldInAddressingMode(Ptr.getNode(), User, DAG, TLI))
18995 RealUse = true;
18996 }
18997
18998 if (!RealUse)
18999 return false;
19000
19002 if (!IsMasked) {
19003 if (IsLoad)
19004 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19005 else
19006 Result =
19007 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19008 } else {
19009 if (IsLoad)
19010 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19011 Offset, AM);
19012 else
19013 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
19014 Offset, AM);
19015 }
19016 ++PreIndexedNodes;
19017 ++NodesCombined;
19018 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
19019 Result.dump(&DAG); dbgs() << '\n');
19020 WorklistRemover DeadNodes(*this);
19021 if (IsLoad) {
19022 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19023 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19024 } else {
19025 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19026 }
19027
19028 // Finally, since the node is now dead, remove it from the graph.
19029 deleteAndRecombine(N);
19030
19031 if (Swapped)
19032 std::swap(BasePtr, Offset);
19033
19034 // Replace other uses of BasePtr that can be updated to use Ptr
19035 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
19036 unsigned OffsetIdx = 1;
19037 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
19038 OffsetIdx = 0;
19039 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
19040 BasePtr.getNode() && "Expected BasePtr operand");
19041
19042 // We need to replace ptr0 in the following expression:
19043 // x0 * offset0 + y0 * ptr0 = t0
19044 // knowing that
19045 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
19046 //
19047 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
19048 // indexed load/store and the expression that needs to be re-written.
19049 //
19050 // Therefore, we have:
19051 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
19052
19053 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
19054 const APInt &Offset0 = CN->getAPIntValue();
19055 const APInt &Offset1 = Offset->getAsAPIntVal();
19056 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
19057 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
19058 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
19059 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
19060
19061 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
19062
19063 APInt CNV = Offset0;
19064 if (X0 < 0) CNV = -CNV;
19065 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
19066 else CNV = CNV - Offset1;
19067
19068 SDLoc DL(OtherUses[i]);
19069
19070 // We can now generate the new expression.
19071 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
19072 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
19073
19074 SDValue NewUse = DAG.getNode(Opcode,
19075 DL,
19076 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
19077 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
19078 deleteAndRecombine(OtherUses[i]);
19079 }
19080
19081 // Replace the uses of Ptr with uses of the updated base value.
19082 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
19083 deleteAndRecombine(Ptr.getNode());
19084 AddToWorklist(Result.getNode());
19085
19086 return true;
19087}
19088
19090 SDValue &BasePtr, SDValue &Offset,
19092 SelectionDAG &DAG,
19093 const TargetLowering &TLI) {
19094 if (PtrUse == N ||
19095 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
19096 return false;
19097
19098 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
19099 return false;
19100
19101 // Don't create a indexed load / store with zero offset.
19103 return false;
19104
19105 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19106 return false;
19107
19110 for (SDNode *User : BasePtr->users()) {
19111 if (User == Ptr.getNode())
19112 continue;
19113
19114 // No if there's a later user which could perform the index instead.
19115 if (isa<MemSDNode>(User)) {
19116 bool IsLoad = true;
19117 bool IsMasked = false;
19118 SDValue OtherPtr;
19120 IsMasked, OtherPtr, TLI)) {
19122 Worklist.push_back(User);
19123 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps))
19124 return false;
19125 }
19126 }
19127
19128 // If all the uses are load / store addresses, then don't do the
19129 // transformation.
19130 if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB) {
19131 for (SDNode *UserUser : User->users())
19132 if (canFoldInAddressingMode(User, UserUser, DAG, TLI))
19133 return false;
19134 }
19135 }
19136 return true;
19137}
19138
19140 bool &IsMasked, SDValue &Ptr,
19141 SDValue &BasePtr, SDValue &Offset,
19143 SelectionDAG &DAG,
19144 const TargetLowering &TLI) {
19146 IsMasked, Ptr, TLI) ||
19147 Ptr->hasOneUse())
19148 return nullptr;
19149
19150 // Try turning it into a post-indexed load / store except when
19151 // 1) All uses are load / store ops that use it as base ptr (and
19152 // it may be folded as addressing mmode).
19153 // 2) Op must be independent of N, i.e. Op is neither a predecessor
19154 // nor a successor of N. Otherwise, if Op is folded that would
19155 // create a cycle.
19157 for (SDNode *Op : Ptr->users()) {
19158 // Check for #1.
19159 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
19160 continue;
19161
19162 // Check for #2.
19165 // Ptr is predecessor to both N and Op.
19166 Visited.insert(Ptr.getNode());
19167 Worklist.push_back(N);
19168 Worklist.push_back(Op);
19169 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
19170 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
19171 return Op;
19172 }
19173 return nullptr;
19174}
19175
19176/// Try to combine a load/store with a add/sub of the base pointer node into a
19177/// post-indexed load/store. The transformation folded the add/subtract into the
19178/// new indexed load/store effectively and all of its uses are redirected to the
19179/// new load/store.
19180bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
19181 if (Level < AfterLegalizeDAG)
19182 return false;
19183
19184 bool IsLoad = true;
19185 bool IsMasked = false;
19186 SDValue Ptr;
19190 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
19191 Offset, AM, DAG, TLI);
19192 if (!Op)
19193 return false;
19194
19196 if (!IsMasked)
19197 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19198 Offset, AM)
19199 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
19200 BasePtr, Offset, AM);
19201 else
19202 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
19203 BasePtr, Offset, AM)
19205 BasePtr, Offset, AM);
19206 ++PostIndexedNodes;
19207 ++NodesCombined;
19208 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
19209 Result.dump(&DAG); dbgs() << '\n');
19210 WorklistRemover DeadNodes(*this);
19211 if (IsLoad) {
19212 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19213 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19214 } else {
19215 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19216 }
19217
19218 // Finally, since the node is now dead, remove it from the graph.
19219 deleteAndRecombine(N);
19220
19221 // Replace the uses of Use with uses of the updated base value.
19223 Result.getValue(IsLoad ? 1 : 0));
19224 deleteAndRecombine(Op);
19225 return true;
19226}
19227
19228/// Return the base-pointer arithmetic from an indexed \p LD.
19229SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
19230 ISD::MemIndexedMode AM = LD->getAddressingMode();
19231 assert(AM != ISD::UNINDEXED);
19232 SDValue BP = LD->getOperand(1);
19233 SDValue Inc = LD->getOperand(2);
19234
19235 // Some backends use TargetConstants for load offsets, but don't expect
19236 // TargetConstants in general ADD nodes. We can convert these constants into
19237 // regular Constants (if the constant is not opaque).
19239 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
19240 "Cannot split out indexing using opaque target constants");
19241 if (Inc.getOpcode() == ISD::TargetConstant) {
19242 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
19243 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
19244 ConstInc->getValueType(0));
19245 }
19246
19247 unsigned Opc =
19248 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
19249 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
19250}
19251
19253 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
19254}
19255
19256bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
19257 EVT STType = Val.getValueType();
19258 EVT STMemType = ST->getMemoryVT();
19259 if (STType == STMemType)
19260 return true;
19261 if (isTypeLegal(STMemType))
19262 return false; // fail.
19263 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
19264 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
19265 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
19266 return true;
19267 }
19268 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
19269 STType.isInteger() && STMemType.isInteger()) {
19270 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
19271 return true;
19272 }
19273 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
19274 Val = DAG.getBitcast(STMemType, Val);
19275 return true;
19276 }
19277 return false; // fail.
19278}
19279
19280bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
19281 EVT LDMemType = LD->getMemoryVT();
19282 EVT LDType = LD->getValueType(0);
19283 assert(Val.getValueType() == LDMemType &&
19284 "Attempting to extend value of non-matching type");
19285 if (LDType == LDMemType)
19286 return true;
19287 if (LDMemType.isInteger() && LDType.isInteger()) {
19288 switch (LD->getExtensionType()) {
19289 case ISD::NON_EXTLOAD:
19290 Val = DAG.getBitcast(LDType, Val);
19291 return true;
19292 case ISD::EXTLOAD:
19293 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
19294 return true;
19295 case ISD::SEXTLOAD:
19296 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
19297 return true;
19298 case ISD::ZEXTLOAD:
19299 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
19300 return true;
19301 }
19302 }
19303 return false;
19304}
19305
19306StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
19307 int64_t &Offset) {
19308 SDValue Chain = LD->getOperand(0);
19309
19310 // Look through CALLSEQ_START.
19311 if (Chain.getOpcode() == ISD::CALLSEQ_START)
19312 Chain = Chain->getOperand(0);
19313
19314 StoreSDNode *ST = nullptr;
19316 if (Chain.getOpcode() == ISD::TokenFactor) {
19317 // Look for unique store within the TokenFactor.
19318 for (SDValue Op : Chain->ops()) {
19319 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
19320 if (!Store)
19321 continue;
19322 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
19323 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
19324 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
19325 continue;
19326 // Make sure the store is not aliased with any nodes in TokenFactor.
19327 GatherAllAliases(Store, Chain, Aliases);
19328 if (Aliases.empty() ||
19329 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
19330 ST = Store;
19331 break;
19332 }
19333 } else {
19334 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
19335 if (Store) {
19336 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
19337 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
19338 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
19339 ST = Store;
19340 }
19341 }
19342
19343 return ST;
19344}
19345
19346SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
19347 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
19348 return SDValue();
19349 SDValue Chain = LD->getOperand(0);
19350 int64_t Offset;
19351
19352 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
19353 // TODO: Relax this restriction for unordered atomics (see D66309)
19354 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
19355 return SDValue();
19356
19357 EVT LDType = LD->getValueType(0);
19358 EVT LDMemType = LD->getMemoryVT();
19359 EVT STMemType = ST->getMemoryVT();
19360 EVT STType = ST->getValue().getValueType();
19361
19362 // There are two cases to consider here:
19363 // 1. The store is fixed width and the load is scalable. In this case we
19364 // don't know at compile time if the store completely envelops the load
19365 // so we abandon the optimisation.
19366 // 2. The store is scalable and the load is fixed width. We could
19367 // potentially support a limited number of cases here, but there has been
19368 // no cost-benefit analysis to prove it's worth it.
19369 bool LdStScalable = LDMemType.isScalableVT();
19370 if (LdStScalable != STMemType.isScalableVT())
19371 return SDValue();
19372
19373 // If we are dealing with scalable vectors on a big endian platform the
19374 // calculation of offsets below becomes trickier, since we do not know at
19375 // compile time the absolute size of the vector. Until we've done more
19376 // analysis on big-endian platforms it seems better to bail out for now.
19377 if (LdStScalable && DAG.getDataLayout().isBigEndian())
19378 return SDValue();
19379
19380 // Normalize for Endianness. After this Offset=0 will denote that the least
19381 // significant bit in the loaded value maps to the least significant bit in
19382 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
19383 // n:th least significant byte of the stored value.
19384 int64_t OrigOffset = Offset;
19385 if (DAG.getDataLayout().isBigEndian())
19386 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
19387 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
19388 8 -
19389 Offset;
19390
19391 // Check that the stored value cover all bits that are loaded.
19392 bool STCoversLD;
19393
19394 TypeSize LdMemSize = LDMemType.getSizeInBits();
19395 TypeSize StMemSize = STMemType.getSizeInBits();
19396 if (LdStScalable)
19397 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
19398 else
19399 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
19400 StMemSize.getFixedValue());
19401
19402 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
19403 if (LD->isIndexed()) {
19404 // Cannot handle opaque target constants and we must respect the user's
19405 // request not to split indexes from loads.
19406 if (!canSplitIdx(LD))
19407 return SDValue();
19408 SDValue Idx = SplitIndexingFromLoad(LD);
19409 SDValue Ops[] = {Val, Idx, Chain};
19410 return CombineTo(LD, Ops, 3);
19411 }
19412 return CombineTo(LD, Val, Chain);
19413 };
19414
19415 if (!STCoversLD)
19416 return SDValue();
19417
19418 // Memory as copy space (potentially masked).
19419 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
19420 // Simple case: Direct non-truncating forwarding
19421 if (LDType.getSizeInBits() == LdMemSize)
19422 return ReplaceLd(LD, ST->getValue(), Chain);
19423 // Can we model the truncate and extension with an and mask?
19424 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
19425 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
19426 // Mask to size of LDMemType
19427 auto Mask =
19429 StMemSize.getFixedValue()),
19430 SDLoc(ST), STType);
19431 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
19432 return ReplaceLd(LD, Val, Chain);
19433 }
19434 }
19435
19436 // Handle some cases for big-endian that would be Offset 0 and handled for
19437 // little-endian.
19438 SDValue Val = ST->getValue();
19439 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
19440 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
19441 !LDType.isVector() && isTypeLegal(STType) &&
19442 TLI.isOperationLegal(ISD::SRL, STType)) {
19443 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
19444 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
19445 Offset = 0;
19446 }
19447 }
19448
19449 // TODO: Deal with nonzero offset.
19450 if (LD->getBasePtr().isUndef() || Offset != 0)
19451 return SDValue();
19452 // Model necessary truncations / extenstions.
19453 // Truncate Value To Stored Memory Size.
19454 do {
19455 if (!getTruncatedStoreValue(ST, Val))
19456 break;
19457 if (!isTypeLegal(LDMemType))
19458 break;
19459 if (STMemType != LDMemType) {
19460 // TODO: Support vectors? This requires extract_subvector/bitcast.
19461 if (!STMemType.isVector() && !LDMemType.isVector() &&
19462 STMemType.isInteger() && LDMemType.isInteger())
19463 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
19464 else
19465 break;
19466 }
19467 if (!extendLoadedValueToExtension(LD, Val))
19468 break;
19469 return ReplaceLd(LD, Val, Chain);
19470 } while (false);
19471
19472 // On failure, cleanup dead nodes we may have created.
19473 if (Val->use_empty())
19474 deleteAndRecombine(Val.getNode());
19475 return SDValue();
19476}
19477
19478SDValue DAGCombiner::visitLOAD(SDNode *N) {
19479 LoadSDNode *LD = cast<LoadSDNode>(N);
19480 SDValue Chain = LD->getChain();
19481 SDValue Ptr = LD->getBasePtr();
19482
19483 // If load is not volatile and there are no uses of the loaded value (and
19484 // the updated indexed value in case of indexed loads), change uses of the
19485 // chain value into uses of the chain input (i.e. delete the dead load).
19486 // TODO: Allow this for unordered atomics (see D66309)
19487 if (LD->isSimple()) {
19488 if (N->getValueType(1) == MVT::Other) {
19489 // Unindexed loads.
19490 if (!N->hasAnyUseOfValue(0)) {
19491 // It's not safe to use the two value CombineTo variant here. e.g.
19492 // v1, chain2 = load chain1, loc
19493 // v2, chain3 = load chain2, loc
19494 // v3 = add v2, c
19495 // Now we replace use of chain2 with chain1. This makes the second load
19496 // isomorphic to the one we are deleting, and thus makes this load live.
19497 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
19498 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
19499 dbgs() << "\n");
19500 WorklistRemover DeadNodes(*this);
19501 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19502 AddUsersToWorklist(Chain.getNode());
19503 if (N->use_empty())
19504 deleteAndRecombine(N);
19505
19506 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19507 }
19508 } else {
19509 // Indexed loads.
19510 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
19511
19512 // If this load has an opaque TargetConstant offset, then we cannot split
19513 // the indexing into an add/sub directly (that TargetConstant may not be
19514 // valid for a different type of node, and we cannot convert an opaque
19515 // target constant into a regular constant).
19516 bool CanSplitIdx = canSplitIdx(LD);
19517
19518 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
19519 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
19520 SDValue Index;
19521 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
19522 Index = SplitIndexingFromLoad(LD);
19523 // Try to fold the base pointer arithmetic into subsequent loads and
19524 // stores.
19525 AddUsersToWorklist(N);
19526 } else
19527 Index = DAG.getUNDEF(N->getValueType(1));
19528 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
19529 dbgs() << "\nWith: "; Undef.dump(&DAG);
19530 dbgs() << " and 2 other values\n");
19531 WorklistRemover DeadNodes(*this);
19532 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
19533 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
19534 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
19535 deleteAndRecombine(N);
19536 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19537 }
19538 }
19539 }
19540
19541 // If this load is directly stored, replace the load value with the stored
19542 // value.
19543 if (auto V = ForwardStoreValueToDirectLoad(LD))
19544 return V;
19545
19546 // Try to infer better alignment information than the load already has.
19547 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
19548 !LD->isAtomic()) {
19549 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
19550 if (*Alignment > LD->getAlign() &&
19551 isAligned(*Alignment, LD->getSrcValueOffset())) {
19552 SDValue NewLoad = DAG.getExtLoad(
19553 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
19554 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
19555 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19556 // NewLoad will always be N as we are only refining the alignment
19557 assert(NewLoad.getNode() == N);
19558 (void)NewLoad;
19559 }
19560 }
19561 }
19562
19563 if (LD->isUnindexed()) {
19564 // Walk up chain skipping non-aliasing memory nodes.
19565 SDValue BetterChain = FindBetterChain(LD, Chain);
19566
19567 // If there is a better chain.
19568 if (Chain != BetterChain) {
19569 SDValue ReplLoad;
19570
19571 // Replace the chain to void dependency.
19572 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
19573 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
19574 BetterChain, Ptr, LD->getMemOperand());
19575 } else {
19576 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
19577 LD->getValueType(0),
19578 BetterChain, Ptr, LD->getMemoryVT(),
19579 LD->getMemOperand());
19580 }
19581
19582 // Create token factor to keep old chain connected.
19583 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
19584 MVT::Other, Chain, ReplLoad.getValue(1));
19585
19586 // Replace uses with load result and token factor
19587 return CombineTo(N, ReplLoad.getValue(0), Token);
19588 }
19589 }
19590
19591 // Try transforming N to an indexed load.
19592 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
19593 return SDValue(N, 0);
19594
19595 // Try to slice up N to more direct loads if the slices are mapped to
19596 // different register banks or pairing can take place.
19597 if (SliceUpLoad(N))
19598 return SDValue(N, 0);
19599
19600 return SDValue();
19601}
19602
19603namespace {
19604
19605/// Helper structure used to slice a load in smaller loads.
19606/// Basically a slice is obtained from the following sequence:
19607/// Origin = load Ty1, Base
19608/// Shift = srl Ty1 Origin, CstTy Amount
19609/// Inst = trunc Shift to Ty2
19610///
19611/// Then, it will be rewritten into:
19612/// Slice = load SliceTy, Base + SliceOffset
19613/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
19614///
19615/// SliceTy is deduced from the number of bits that are actually used to
19616/// build Inst.
19617struct LoadedSlice {
19618 /// Helper structure used to compute the cost of a slice.
19619 struct Cost {
19620 /// Are we optimizing for code size.
19621 bool ForCodeSize = false;
19622
19623 /// Various cost.
19624 unsigned Loads = 0;
19625 unsigned Truncates = 0;
19626 unsigned CrossRegisterBanksCopies = 0;
19627 unsigned ZExts = 0;
19628 unsigned Shift = 0;
19629
19630 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
19631
19632 /// Get the cost of one isolated slice.
19633 Cost(const LoadedSlice &LS, bool ForCodeSize)
19634 : ForCodeSize(ForCodeSize), Loads(1) {
19635 EVT TruncType = LS.Inst->getValueType(0);
19636 EVT LoadedType = LS.getLoadedType();
19637 if (TruncType != LoadedType &&
19638 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
19639 ZExts = 1;
19640 }
19641
19642 /// Account for slicing gain in the current cost.
19643 /// Slicing provide a few gains like removing a shift or a
19644 /// truncate. This method allows to grow the cost of the original
19645 /// load with the gain from this slice.
19646 void addSliceGain(const LoadedSlice &LS) {
19647 // Each slice saves a truncate.
19648 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
19649 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
19650 ++Truncates;
19651 // If there is a shift amount, this slice gets rid of it.
19652 if (LS.Shift)
19653 ++Shift;
19654 // If this slice can merge a cross register bank copy, account for it.
19655 if (LS.canMergeExpensiveCrossRegisterBankCopy())
19656 ++CrossRegisterBanksCopies;
19657 }
19658
19659 Cost &operator+=(const Cost &RHS) {
19660 Loads += RHS.Loads;
19661 Truncates += RHS.Truncates;
19662 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
19663 ZExts += RHS.ZExts;
19664 Shift += RHS.Shift;
19665 return *this;
19666 }
19667
19668 bool operator==(const Cost &RHS) const {
19669 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
19670 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
19671 ZExts == RHS.ZExts && Shift == RHS.Shift;
19672 }
19673
19674 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
19675
19676 bool operator<(const Cost &RHS) const {
19677 // Assume cross register banks copies are as expensive as loads.
19678 // FIXME: Do we want some more target hooks?
19679 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
19680 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
19681 // Unless we are optimizing for code size, consider the
19682 // expensive operation first.
19683 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
19684 return ExpensiveOpsLHS < ExpensiveOpsRHS;
19685 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
19686 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
19687 }
19688
19689 bool operator>(const Cost &RHS) const { return RHS < *this; }
19690
19691 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
19692
19693 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
19694 };
19695
19696 // The last instruction that represent the slice. This should be a
19697 // truncate instruction.
19698 SDNode *Inst;
19699
19700 // The original load instruction.
19701 LoadSDNode *Origin;
19702
19703 // The right shift amount in bits from the original load.
19704 unsigned Shift;
19705
19706 // The DAG from which Origin came from.
19707 // This is used to get some contextual information about legal types, etc.
19708 SelectionDAG *DAG;
19709
19710 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
19711 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
19712 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
19713
19714 /// Get the bits used in a chunk of bits \p BitWidth large.
19715 /// \return Result is \p BitWidth and has used bits set to 1 and
19716 /// not used bits set to 0.
19717 APInt getUsedBits() const {
19718 // Reproduce the trunc(lshr) sequence:
19719 // - Start from the truncated value.
19720 // - Zero extend to the desired bit width.
19721 // - Shift left.
19722 assert(Origin && "No original load to compare against.");
19723 unsigned BitWidth = Origin->getValueSizeInBits(0);
19724 assert(Inst && "This slice is not bound to an instruction");
19725 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
19726 "Extracted slice is bigger than the whole type!");
19727 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
19728 UsedBits.setAllBits();
19729 UsedBits = UsedBits.zext(BitWidth);
19730 UsedBits <<= Shift;
19731 return UsedBits;
19732 }
19733
19734 /// Get the size of the slice to be loaded in bytes.
19735 unsigned getLoadedSize() const {
19736 unsigned SliceSize = getUsedBits().popcount();
19737 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
19738 return SliceSize / 8;
19739 }
19740
19741 /// Get the type that will be loaded for this slice.
19742 /// Note: This may not be the final type for the slice.
19743 EVT getLoadedType() const {
19744 assert(DAG && "Missing context");
19745 LLVMContext &Ctxt = *DAG->getContext();
19746 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
19747 }
19748
19749 /// Get the alignment of the load used for this slice.
19750 Align getAlign() const {
19751 Align Alignment = Origin->getAlign();
19752 uint64_t Offset = getOffsetFromBase();
19753 if (Offset != 0)
19754 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
19755 return Alignment;
19756 }
19757
19758 /// Check if this slice can be rewritten with legal operations.
19759 bool isLegal() const {
19760 // An invalid slice is not legal.
19761 if (!Origin || !Inst || !DAG)
19762 return false;
19763
19764 // Offsets are for indexed load only, we do not handle that.
19765 if (!Origin->getOffset().isUndef())
19766 return false;
19767
19768 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19769
19770 // Check that the type is legal.
19771 EVT SliceType = getLoadedType();
19772 if (!TLI.isTypeLegal(SliceType))
19773 return false;
19774
19775 // Check that the load is legal for this type.
19776 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19777 return false;
19778
19779 // Check that the offset can be computed.
19780 // 1. Check its type.
19781 EVT PtrType = Origin->getBasePtr().getValueType();
19782 if (PtrType == MVT::Untyped || PtrType.isExtended())
19783 return false;
19784
19785 // 2. Check that it fits in the immediate.
19786 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19787 return false;
19788
19789 // 3. Check that the computation is legal.
19790 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19791 return false;
19792
19793 // Check that the zext is legal if it needs one.
19794 EVT TruncateType = Inst->getValueType(0);
19795 if (TruncateType != SliceType &&
19796 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19797 return false;
19798
19799 return true;
19800 }
19801
19802 /// Get the offset in bytes of this slice in the original chunk of
19803 /// bits.
19804 /// \pre DAG != nullptr.
19805 uint64_t getOffsetFromBase() const {
19806 assert(DAG && "Missing context.");
19807 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19808 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19809 uint64_t Offset = Shift / 8;
19810 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19811 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19812 "The size of the original loaded type is not a multiple of a"
19813 " byte.");
19814 // If Offset is bigger than TySizeInBytes, it means we are loading all
19815 // zeros. This should have been optimized before in the process.
19816 assert(TySizeInBytes > Offset &&
19817 "Invalid shift amount for given loaded size");
19818 if (IsBigEndian)
19819 Offset = TySizeInBytes - Offset - getLoadedSize();
19820 return Offset;
19821 }
19822
19823 /// Generate the sequence of instructions to load the slice
19824 /// represented by this object and redirect the uses of this slice to
19825 /// this new sequence of instructions.
19826 /// \pre this->Inst && this->Origin are valid Instructions and this
19827 /// object passed the legal check: LoadedSlice::isLegal returned true.
19828 /// \return The last instruction of the sequence used to load the slice.
19829 SDValue loadSlice() const {
19830 assert(Inst && Origin && "Unable to replace a non-existing slice.");
19831 const SDValue &OldBaseAddr = Origin->getBasePtr();
19832 SDValue BaseAddr = OldBaseAddr;
19833 // Get the offset in that chunk of bytes w.r.t. the endianness.
19834 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19835 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19836 if (Offset) {
19837 // BaseAddr = BaseAddr + Offset.
19838 EVT ArithType = BaseAddr.getValueType();
19839 SDLoc DL(Origin);
19840 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19841 DAG->getConstant(Offset, DL, ArithType));
19842 }
19843
19844 // Create the type of the loaded slice according to its size.
19845 EVT SliceType = getLoadedType();
19846
19847 // Create the load for the slice.
19848 SDValue LastInst =
19849 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19851 Origin->getMemOperand()->getFlags());
19852 // If the final type is not the same as the loaded type, this means that
19853 // we have to pad with zero. Create a zero extend for that.
19854 EVT FinalType = Inst->getValueType(0);
19855 if (SliceType != FinalType)
19856 LastInst =
19857 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19858 return LastInst;
19859 }
19860
19861 /// Check if this slice can be merged with an expensive cross register
19862 /// bank copy. E.g.,
19863 /// i = load i32
19864 /// f = bitcast i32 i to float
19865 bool canMergeExpensiveCrossRegisterBankCopy() const {
19866 if (!Inst || !Inst->hasOneUse())
19867 return false;
19868 SDNode *User = *Inst->user_begin();
19869 if (User->getOpcode() != ISD::BITCAST)
19870 return false;
19871 assert(DAG && "Missing context");
19872 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19873 EVT ResVT = User->getValueType(0);
19874 const TargetRegisterClass *ResRC =
19875 TLI.getRegClassFor(ResVT.getSimpleVT(), User->isDivergent());
19876 const TargetRegisterClass *ArgRC =
19877 TLI.getRegClassFor(User->getOperand(0).getValueType().getSimpleVT(),
19878 User->getOperand(0)->isDivergent());
19879 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19880 return false;
19881
19882 // At this point, we know that we perform a cross-register-bank copy.
19883 // Check if it is expensive.
19885 // Assume bitcasts are cheap, unless both register classes do not
19886 // explicitly share a common sub class.
19887 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19888 return false;
19889
19890 // Check if it will be merged with the load.
19891 // 1. Check the alignment / fast memory access constraint.
19892 unsigned IsFast = 0;
19893 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19894 Origin->getAddressSpace(), getAlign(),
19895 Origin->getMemOperand()->getFlags(), &IsFast) ||
19896 !IsFast)
19897 return false;
19898
19899 // 2. Check that the load is a legal operation for that type.
19900 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19901 return false;
19902
19903 // 3. Check that we do not have a zext in the way.
19904 if (Inst->getValueType(0) != getLoadedType())
19905 return false;
19906
19907 return true;
19908 }
19909};
19910
19911} // end anonymous namespace
19912
19913/// Check that all bits set in \p UsedBits form a dense region, i.e.,
19914/// \p UsedBits looks like 0..0 1..1 0..0.
19915static bool areUsedBitsDense(const APInt &UsedBits) {
19916 // If all the bits are one, this is dense!
19917 if (UsedBits.isAllOnes())
19918 return true;
19919
19920 // Get rid of the unused bits on the right.
19921 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19922 // Get rid of the unused bits on the left.
19923 if (NarrowedUsedBits.countl_zero())
19924 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19925 // Check that the chunk of bits is completely used.
19926 return NarrowedUsedBits.isAllOnes();
19927}
19928
19929/// Check whether or not \p First and \p Second are next to each other
19930/// in memory. This means that there is no hole between the bits loaded
19931/// by \p First and the bits loaded by \p Second.
19932static bool areSlicesNextToEachOther(const LoadedSlice &First,
19933 const LoadedSlice &Second) {
19934 assert(First.Origin == Second.Origin && First.Origin &&
19935 "Unable to match different memory origins.");
19936 APInt UsedBits = First.getUsedBits();
19937 assert((UsedBits & Second.getUsedBits()) == 0 &&
19938 "Slices are not supposed to overlap.");
19939 UsedBits |= Second.getUsedBits();
19940 return areUsedBitsDense(UsedBits);
19941}
19942
19943/// Adjust the \p GlobalLSCost according to the target
19944/// paring capabilities and the layout of the slices.
19945/// \pre \p GlobalLSCost should account for at least as many loads as
19946/// there is in the slices in \p LoadedSlices.
19948 LoadedSlice::Cost &GlobalLSCost) {
19949 unsigned NumberOfSlices = LoadedSlices.size();
19950 // If there is less than 2 elements, no pairing is possible.
19951 if (NumberOfSlices < 2)
19952 return;
19953
19954 // Sort the slices so that elements that are likely to be next to each
19955 // other in memory are next to each other in the list.
19956 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19957 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19958 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19959 });
19960 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19961 // First (resp. Second) is the first (resp. Second) potentially candidate
19962 // to be placed in a paired load.
19963 const LoadedSlice *First = nullptr;
19964 const LoadedSlice *Second = nullptr;
19965 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19966 // Set the beginning of the pair.
19967 First = Second) {
19968 Second = &LoadedSlices[CurrSlice];
19969
19970 // If First is NULL, it means we start a new pair.
19971 // Get to the next slice.
19972 if (!First)
19973 continue;
19974
19975 EVT LoadedType = First->getLoadedType();
19976
19977 // If the types of the slices are different, we cannot pair them.
19978 if (LoadedType != Second->getLoadedType())
19979 continue;
19980
19981 // Check if the target supplies paired loads for this type.
19982 Align RequiredAlignment;
19983 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19984 // move to the next pair, this type is hopeless.
19985 Second = nullptr;
19986 continue;
19987 }
19988 // Check if we meet the alignment requirement.
19989 if (First->getAlign() < RequiredAlignment)
19990 continue;
19991
19992 // Check that both loads are next to each other in memory.
19993 if (!areSlicesNextToEachOther(*First, *Second))
19994 continue;
19995
19996 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19997 --GlobalLSCost.Loads;
19998 // Move to the next pair.
19999 Second = nullptr;
20000 }
20001}
20002
20003/// Check the profitability of all involved LoadedSlice.
20004/// Currently, it is considered profitable if there is exactly two
20005/// involved slices (1) which are (2) next to each other in memory, and
20006/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
20007///
20008/// Note: The order of the elements in \p LoadedSlices may be modified, but not
20009/// the elements themselves.
20010///
20011/// FIXME: When the cost model will be mature enough, we can relax
20012/// constraints (1) and (2).
20014 const APInt &UsedBits, bool ForCodeSize) {
20015 unsigned NumberOfSlices = LoadedSlices.size();
20017 return NumberOfSlices > 1;
20018
20019 // Check (1).
20020 if (NumberOfSlices != 2)
20021 return false;
20022
20023 // Check (2).
20024 if (!areUsedBitsDense(UsedBits))
20025 return false;
20026
20027 // Check (3).
20028 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
20029 // The original code has one big load.
20030 OrigCost.Loads = 1;
20031 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
20032 const LoadedSlice &LS = LoadedSlices[CurrSlice];
20033 // Accumulate the cost of all the slices.
20034 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
20035 GlobalSlicingCost += SliceCost;
20036
20037 // Account as cost in the original configuration the gain obtained
20038 // with the current slices.
20039 OrigCost.addSliceGain(LS);
20040 }
20041
20042 // If the target supports paired load, adjust the cost accordingly.
20043 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
20044 return OrigCost > GlobalSlicingCost;
20045}
20046
20047/// If the given load, \p LI, is used only by trunc or trunc(lshr)
20048/// operations, split it in the various pieces being extracted.
20049///
20050/// This sort of thing is introduced by SROA.
20051/// This slicing takes care not to insert overlapping loads.
20052/// \pre LI is a simple load (i.e., not an atomic or volatile load).
20053bool DAGCombiner::SliceUpLoad(SDNode *N) {
20054 if (Level < AfterLegalizeDAG)
20055 return false;
20056
20057 LoadSDNode *LD = cast<LoadSDNode>(N);
20058 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
20059 !LD->getValueType(0).isInteger())
20060 return false;
20061
20062 // The algorithm to split up a load of a scalable vector into individual
20063 // elements currently requires knowing the length of the loaded type,
20064 // so will need adjusting to work on scalable vectors.
20065 if (LD->getValueType(0).isScalableVector())
20066 return false;
20067
20068 // Keep track of already used bits to detect overlapping values.
20069 // In that case, we will just abort the transformation.
20070 APInt UsedBits(LD->getValueSizeInBits(0), 0);
20071
20072 SmallVector<LoadedSlice, 4> LoadedSlices;
20073
20074 // Check if this load is used as several smaller chunks of bits.
20075 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
20076 // of computation for each trunc.
20077 for (SDUse &U : LD->uses()) {
20078 // Skip the uses of the chain.
20079 if (U.getResNo() != 0)
20080 continue;
20081
20082 SDNode *User = U.getUser();
20083 unsigned Shift = 0;
20084
20085 // Check if this is a trunc(lshr).
20086 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
20087 isa<ConstantSDNode>(User->getOperand(1))) {
20088 Shift = User->getConstantOperandVal(1);
20089 User = *User->user_begin();
20090 }
20091
20092 // At this point, User is a Truncate, iff we encountered, trunc or
20093 // trunc(lshr).
20094 if (User->getOpcode() != ISD::TRUNCATE)
20095 return false;
20096
20097 // The width of the type must be a power of 2 and greater than 8-bits.
20098 // Otherwise the load cannot be represented in LLVM IR.
20099 // Moreover, if we shifted with a non-8-bits multiple, the slice
20100 // will be across several bytes. We do not support that.
20101 unsigned Width = User->getValueSizeInBits(0);
20102 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
20103 return false;
20104
20105 // Build the slice for this chain of computations.
20106 LoadedSlice LS(User, LD, Shift, &DAG);
20107 APInt CurrentUsedBits = LS.getUsedBits();
20108
20109 // Check if this slice overlaps with another.
20110 if ((CurrentUsedBits & UsedBits) != 0)
20111 return false;
20112 // Update the bits used globally.
20113 UsedBits |= CurrentUsedBits;
20114
20115 // Check if the new slice would be legal.
20116 if (!LS.isLegal())
20117 return false;
20118
20119 // Record the slice.
20120 LoadedSlices.push_back(LS);
20121 }
20122
20123 // Abort slicing if it does not seem to be profitable.
20124 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
20125 return false;
20126
20127 ++SlicedLoads;
20128
20129 // Rewrite each chain to use an independent load.
20130 // By construction, each chain can be represented by a unique load.
20131
20132 // Prepare the argument for the new token factor for all the slices.
20133 SmallVector<SDValue, 8> ArgChains;
20134 for (const LoadedSlice &LS : LoadedSlices) {
20135 SDValue SliceInst = LS.loadSlice();
20136 CombineTo(LS.Inst, SliceInst, true);
20137 if (SliceInst.getOpcode() != ISD::LOAD)
20138 SliceInst = SliceInst.getOperand(0);
20139 assert(SliceInst->getOpcode() == ISD::LOAD &&
20140 "It takes more than a zext to get to the loaded slice!!");
20141 ArgChains.push_back(SliceInst.getValue(1));
20142 }
20143
20144 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
20145 ArgChains);
20146 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20147 AddToWorklist(Chain.getNode());
20148 return true;
20149}
20150
20151/// Check to see if V is (and load (ptr), imm), where the load is having
20152/// specific bytes cleared out. If so, return the byte size being masked out
20153/// and the shift amount.
20154static std::pair<unsigned, unsigned>
20156 std::pair<unsigned, unsigned> Result(0, 0);
20157
20158 // Check for the structure we're looking for.
20159 if (V->getOpcode() != ISD::AND ||
20160 !isa<ConstantSDNode>(V->getOperand(1)) ||
20161 !ISD::isNormalLoad(V->getOperand(0).getNode()))
20162 return Result;
20163
20164 // Check the chain and pointer.
20165 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
20166 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
20167
20168 // This only handles simple types.
20169 if (V.getValueType() != MVT::i16 &&
20170 V.getValueType() != MVT::i32 &&
20171 V.getValueType() != MVT::i64)
20172 return Result;
20173
20174 // Check the constant mask. Invert it so that the bits being masked out are
20175 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
20176 // follow the sign bit for uniformity.
20177 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
20178 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
20179 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
20180 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
20181 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
20182 if (NotMaskLZ == 64) return Result; // All zero mask.
20183
20184 // See if we have a continuous run of bits. If so, we have 0*1+0*
20185 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
20186 return Result;
20187
20188 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
20189 if (V.getValueType() != MVT::i64 && NotMaskLZ)
20190 NotMaskLZ -= 64-V.getValueSizeInBits();
20191
20192 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
20193 switch (MaskedBytes) {
20194 case 1:
20195 case 2:
20196 case 4: break;
20197 default: return Result; // All one mask, or 5-byte mask.
20198 }
20199
20200 // Verify that the first bit starts at a multiple of mask so that the access
20201 // is aligned the same as the access width.
20202 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
20203
20204 // For narrowing to be valid, it must be the case that the load the
20205 // immediately preceding memory operation before the store.
20206 if (LD == Chain.getNode())
20207 ; // ok.
20208 else if (Chain->getOpcode() == ISD::TokenFactor &&
20209 SDValue(LD, 1).hasOneUse()) {
20210 // LD has only 1 chain use so they are no indirect dependencies.
20211 if (!LD->isOperandOf(Chain.getNode()))
20212 return Result;
20213 } else
20214 return Result; // Fail.
20215
20216 Result.first = MaskedBytes;
20217 Result.second = NotMaskTZ/8;
20218 return Result;
20219}
20220
20221/// Check to see if IVal is something that provides a value as specified by
20222/// MaskInfo. If so, replace the specified store with a narrower store of
20223/// truncated IVal.
20224static SDValue
20225ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
20226 SDValue IVal, StoreSDNode *St,
20227 DAGCombiner *DC) {
20228 unsigned NumBytes = MaskInfo.first;
20229 unsigned ByteShift = MaskInfo.second;
20230 SelectionDAG &DAG = DC->getDAG();
20231
20232 // Check to see if IVal is all zeros in the part being masked in by the 'or'
20233 // that uses this. If not, this is not a replacement.
20234 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
20235 ByteShift*8, (ByteShift+NumBytes)*8);
20236 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
20237
20238 // Check that it is legal on the target to do this. It is legal if the new
20239 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
20240 // legalization. If the source type is legal, but the store type isn't, see
20241 // if we can use a truncating store.
20242 MVT VT = MVT::getIntegerVT(NumBytes * 8);
20243 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20244 bool UseTruncStore;
20245 if (DC->isTypeLegal(VT))
20246 UseTruncStore = false;
20247 else if (TLI.isTypeLegal(IVal.getValueType()) &&
20248 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
20249 UseTruncStore = true;
20250 else
20251 return SDValue();
20252
20253 // Can't do this for indexed stores.
20254 if (St->isIndexed())
20255 return SDValue();
20256
20257 // Check that the target doesn't think this is a bad idea.
20258 if (St->getMemOperand() &&
20259 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
20260 *St->getMemOperand()))
20261 return SDValue();
20262
20263 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
20264 // shifted by ByteShift and truncated down to NumBytes.
20265 if (ByteShift) {
20266 SDLoc DL(IVal);
20267 IVal = DAG.getNode(
20268 ISD::SRL, DL, IVal.getValueType(), IVal,
20269 DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL));
20270 }
20271
20272 // Figure out the offset for the store and the alignment of the access.
20273 unsigned StOffset;
20274 if (DAG.getDataLayout().isLittleEndian())
20275 StOffset = ByteShift;
20276 else
20277 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
20278
20279 SDValue Ptr = St->getBasePtr();
20280 if (StOffset) {
20281 SDLoc DL(IVal);
20283 }
20284
20285 ++OpsNarrowed;
20286 if (UseTruncStore)
20287 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
20288 St->getPointerInfo().getWithOffset(StOffset),
20289 VT, St->getOriginalAlign());
20290
20291 // Truncate down to the new size.
20292 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
20293
20294 return DAG
20295 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
20296 St->getPointerInfo().getWithOffset(StOffset),
20297 St->getOriginalAlign());
20298}
20299
20300/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
20301/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
20302/// narrowing the load and store if it would end up being a win for performance
20303/// or code size.
20304SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
20305 StoreSDNode *ST = cast<StoreSDNode>(N);
20306 if (!ST->isSimple())
20307 return SDValue();
20308
20309 SDValue Chain = ST->getChain();
20310 SDValue Value = ST->getValue();
20311 SDValue Ptr = ST->getBasePtr();
20312 EVT VT = Value.getValueType();
20313
20314 if (ST->isTruncatingStore() || VT.isVector())
20315 return SDValue();
20316
20317 unsigned Opc = Value.getOpcode();
20318
20319 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
20320 !Value.hasOneUse())
20321 return SDValue();
20322
20323 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
20324 // is a byte mask indicating a consecutive number of bytes, check to see if
20325 // Y is known to provide just those bytes. If so, we try to replace the
20326 // load + replace + store sequence with a single (narrower) store, which makes
20327 // the load dead.
20329 std::pair<unsigned, unsigned> MaskedLoad;
20330 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
20331 if (MaskedLoad.first)
20332 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
20333 Value.getOperand(1), ST,this))
20334 return NewST;
20335
20336 // Or is commutative, so try swapping X and Y.
20337 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
20338 if (MaskedLoad.first)
20339 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
20340 Value.getOperand(0), ST,this))
20341 return NewST;
20342 }
20343
20345 return SDValue();
20346
20347 if (Value.getOperand(1).getOpcode() != ISD::Constant)
20348 return SDValue();
20349
20350 SDValue N0 = Value.getOperand(0);
20351 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
20352 Chain == SDValue(N0.getNode(), 1)) {
20353 LoadSDNode *LD = cast<LoadSDNode>(N0);
20354 if (LD->getBasePtr() != Ptr ||
20355 LD->getPointerInfo().getAddrSpace() !=
20356 ST->getPointerInfo().getAddrSpace())
20357 return SDValue();
20358
20359 // Find the type NewVT to narrow the load / op / store to.
20360 SDValue N1 = Value.getOperand(1);
20361 unsigned BitWidth = N1.getValueSizeInBits();
20362 APInt Imm = N1->getAsAPIntVal();
20363 if (Opc == ISD::AND)
20364 Imm.flipAllBits();
20365 if (Imm == 0 || Imm.isAllOnes())
20366 return SDValue();
20367 // Find least/most significant bit that need to be part of the narrowed
20368 // operation. We assume target will need to address/access full bytes, so
20369 // we make sure to align LSB and MSB at byte boundaries.
20370 unsigned BitsPerByteMask = 7u;
20371 unsigned LSB = Imm.countr_zero() & ~BitsPerByteMask;
20372 unsigned MSB = (Imm.getActiveBits() - 1) | BitsPerByteMask;
20373 unsigned NewBW = NextPowerOf2(MSB - LSB);
20374 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
20375 // The narrowing should be profitable, the load/store operation should be
20376 // legal (or custom) and the store size should be equal to the NewVT width.
20377 while (NewBW < BitWidth &&
20378 (NewVT.getStoreSizeInBits() != NewBW ||
20379 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
20381 !TLI.isNarrowingProfitable(N, VT, NewVT)))) {
20382 NewBW = NextPowerOf2(NewBW);
20383 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
20384 }
20385 if (NewBW >= BitWidth)
20386 return SDValue();
20387
20388 // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
20389 // large enough to cover all bits that should be modified. This type might
20390 // however be larger than really needed (such as i32 while we actually only
20391 // need to modify one byte). Now we need to find our how to align the memory
20392 // accesses to satisfy preferred alignments as well as avoiding to access
20393 // memory outside the store size of the orignal access.
20394
20395 unsigned VTStoreSize = VT.getStoreSizeInBits().getFixedValue();
20396
20397 // Let ShAmt denote amount of bits to skip, counted from the least
20398 // significant bits of Imm. And let PtrOff how much the pointer needs to be
20399 // offsetted (in bytes) for the new access.
20400 unsigned ShAmt = 0;
20401 uint64_t PtrOff = 0;
20402 for (; ShAmt + NewBW <= VTStoreSize; ShAmt += 8) {
20403 // Make sure the range [ShAmt, ShAmt+NewBW) cover both LSB and MSB.
20404 if (ShAmt > LSB)
20405 return SDValue();
20406 if (ShAmt + NewBW < MSB)
20407 continue;
20408
20409 // Calculate PtrOff.
20410 unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian()
20411 ? VTStoreSize - NewBW - ShAmt
20412 : ShAmt;
20413 PtrOff = PtrAdjustmentInBits / 8;
20414
20415 // Now check if narrow access is allowed and fast, considering alignments.
20416 unsigned IsFast = 0;
20417 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
20418 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
20419 LD->getAddressSpace(), NewAlign,
20420 LD->getMemOperand()->getFlags(), &IsFast) &&
20421 IsFast)
20422 break;
20423 }
20424 // If loop above did not find any accepted ShAmt we need to exit here.
20425 if (ShAmt + NewBW > VTStoreSize)
20426 return SDValue();
20427
20428 APInt NewImm = Imm.lshr(ShAmt).trunc(NewBW);
20429 if (Opc == ISD::AND)
20430 NewImm.flipAllBits();
20431 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
20432 SDValue NewPtr =
20434 SDValue NewLD =
20435 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
20436 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
20437 LD->getMemOperand()->getFlags(), LD->getAAInfo());
20438 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
20439 DAG.getConstant(NewImm, SDLoc(Value), NewVT));
20440 SDValue NewST =
20441 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
20442 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
20443
20444 AddToWorklist(NewPtr.getNode());
20445 AddToWorklist(NewLD.getNode());
20446 AddToWorklist(NewVal.getNode());
20447 WorklistRemover DeadNodes(*this);
20448 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
20449 ++OpsNarrowed;
20450 return NewST;
20451 }
20452
20453 return SDValue();
20454}
20455
20456/// For a given floating point load / store pair, if the load value isn't used
20457/// by any other operations, then consider transforming the pair to integer
20458/// load / store operations if the target deems the transformation profitable.
20459SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
20460 StoreSDNode *ST = cast<StoreSDNode>(N);
20461 SDValue Value = ST->getValue();
20462 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
20463 Value.hasOneUse()) {
20464 LoadSDNode *LD = cast<LoadSDNode>(Value);
20465 EVT VT = LD->getMemoryVT();
20466 if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
20467 LD->isNonTemporal() || ST->isNonTemporal() ||
20468 LD->getPointerInfo().getAddrSpace() != 0 ||
20469 ST->getPointerInfo().getAddrSpace() != 0)
20470 return SDValue();
20471
20472 TypeSize VTSize = VT.getSizeInBits();
20473
20474 // We don't know the size of scalable types at compile time so we cannot
20475 // create an integer of the equivalent size.
20476 if (VTSize.isScalable())
20477 return SDValue();
20478
20479 unsigned FastLD = 0, FastST = 0;
20480 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
20481 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
20482 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
20485 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20486 *LD->getMemOperand(), &FastLD) ||
20487 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20488 *ST->getMemOperand(), &FastST) ||
20489 !FastLD || !FastST)
20490 return SDValue();
20491
20492 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(),
20493 LD->getBasePtr(), LD->getMemOperand());
20494
20495 SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD,
20496 ST->getBasePtr(), ST->getMemOperand());
20497
20498 AddToWorklist(NewLD.getNode());
20499 AddToWorklist(NewST.getNode());
20500 WorklistRemover DeadNodes(*this);
20501 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
20502 ++LdStFP2Int;
20503 return NewST;
20504 }
20505
20506 return SDValue();
20507}
20508
20509// This is a helper function for visitMUL to check the profitability
20510// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
20511// MulNode is the original multiply, AddNode is (add x, c1),
20512// and ConstNode is c2.
20513//
20514// If the (add x, c1) has multiple uses, we could increase
20515// the number of adds if we make this transformation.
20516// It would only be worth doing this if we can remove a
20517// multiply in the process. Check for that here.
20518// To illustrate:
20519// (A + c1) * c3
20520// (A + c2) * c3
20521// We're checking for cases where we have common "c3 * A" expressions.
20522bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
20523 SDValue ConstNode) {
20524 APInt Val;
20525
20526 // If the add only has one use, and the target thinks the folding is
20527 // profitable or does not lead to worse code, this would be OK to do.
20528 if (AddNode->hasOneUse() &&
20529 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
20530 return true;
20531
20532 // Walk all the users of the constant with which we're multiplying.
20533 for (SDNode *User : ConstNode->users()) {
20534 if (User == MulNode) // This use is the one we're on right now. Skip it.
20535 continue;
20536
20537 if (User->getOpcode() == ISD::MUL) { // We have another multiply use.
20538 SDNode *OtherOp;
20539 SDNode *MulVar = AddNode.getOperand(0).getNode();
20540
20541 // OtherOp is what we're multiplying against the constant.
20542 if (User->getOperand(0) == ConstNode)
20543 OtherOp = User->getOperand(1).getNode();
20544 else
20545 OtherOp = User->getOperand(0).getNode();
20546
20547 // Check to see if multiply is with the same operand of our "add".
20548 //
20549 // ConstNode = CONST
20550 // User = ConstNode * A <-- visiting User. OtherOp is A.
20551 // ...
20552 // AddNode = (A + c1) <-- MulVar is A.
20553 // = AddNode * ConstNode <-- current visiting instruction.
20554 //
20555 // If we make this transformation, we will have a common
20556 // multiply (ConstNode * A) that we can save.
20557 if (OtherOp == MulVar)
20558 return true;
20559
20560 // Now check to see if a future expansion will give us a common
20561 // multiply.
20562 //
20563 // ConstNode = CONST
20564 // AddNode = (A + c1)
20565 // ... = AddNode * ConstNode <-- current visiting instruction.
20566 // ...
20567 // OtherOp = (A + c2)
20568 // User = OtherOp * ConstNode <-- visiting User.
20569 //
20570 // If we make this transformation, we will have a common
20571 // multiply (CONST * A) after we also do the same transformation
20572 // to the "t2" instruction.
20573 if (OtherOp->getOpcode() == ISD::ADD &&
20575 OtherOp->getOperand(0).getNode() == MulVar)
20576 return true;
20577 }
20578 }
20579
20580 // Didn't find a case where this would be profitable.
20581 return false;
20582}
20583
20584SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
20585 unsigned NumStores) {
20588 SDLoc StoreDL(StoreNodes[0].MemNode);
20589
20590 for (unsigned i = 0; i < NumStores; ++i) {
20591 Visited.insert(StoreNodes[i].MemNode);
20592 }
20593
20594 // don't include nodes that are children or repeated nodes.
20595 for (unsigned i = 0; i < NumStores; ++i) {
20596 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
20597 Chains.push_back(StoreNodes[i].MemNode->getChain());
20598 }
20599
20600 assert(!Chains.empty() && "Chain should have generated a chain");
20601 return DAG.getTokenFactor(StoreDL, Chains);
20602}
20603
20604bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
20605 const Value *UnderlyingObj = nullptr;
20606 for (const auto &MemOp : StoreNodes) {
20607 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
20608 // Pseudo value like stack frame has its own frame index and size, should
20609 // not use the first store's frame index for other frames.
20610 if (MMO->getPseudoValue())
20611 return false;
20612
20613 if (!MMO->getValue())
20614 return false;
20615
20616 const Value *Obj = getUnderlyingObject(MMO->getValue());
20617
20618 if (UnderlyingObj && UnderlyingObj != Obj)
20619 return false;
20620
20621 if (!UnderlyingObj)
20622 UnderlyingObj = Obj;
20623 }
20624
20625 return true;
20626}
20627
20628bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
20629 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
20630 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
20631 // Make sure we have something to merge.
20632 if (NumStores < 2)
20633 return false;
20634
20635 assert((!UseTrunc || !UseVector) &&
20636 "This optimization cannot emit a vector truncating store");
20637
20638 // The latest Node in the DAG.
20639 SDLoc DL(StoreNodes[0].MemNode);
20640
20641 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
20642 unsigned SizeInBits = NumStores * ElementSizeBits;
20643 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20644
20645 std::optional<MachineMemOperand::Flags> Flags;
20646 AAMDNodes AAInfo;
20647 for (unsigned I = 0; I != NumStores; ++I) {
20648 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20649 if (!Flags) {
20650 Flags = St->getMemOperand()->getFlags();
20651 AAInfo = St->getAAInfo();
20652 continue;
20653 }
20654 // Skip merging if there's an inconsistent flag.
20655 if (Flags != St->getMemOperand()->getFlags())
20656 return false;
20657 // Concatenate AA metadata.
20658 AAInfo = AAInfo.concat(St->getAAInfo());
20659 }
20660
20661 EVT StoreTy;
20662 if (UseVector) {
20663 unsigned Elts = NumStores * NumMemElts;
20664 // Get the type for the merged vector store.
20665 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20666 } else
20667 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
20668
20669 SDValue StoredVal;
20670 if (UseVector) {
20671 if (IsConstantSrc) {
20672 SmallVector<SDValue, 8> BuildVector;
20673 for (unsigned I = 0; I != NumStores; ++I) {
20674 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20675 SDValue Val = St->getValue();
20676 // If constant is of the wrong type, convert it now. This comes up
20677 // when one of our stores was truncating.
20678 if (MemVT != Val.getValueType()) {
20679 Val = peekThroughBitcasts(Val);
20680 // Deal with constants of wrong size.
20681 if (ElementSizeBits != Val.getValueSizeInBits()) {
20682 auto *C = dyn_cast<ConstantSDNode>(Val);
20683 if (!C)
20684 // Not clear how to truncate FP values.
20685 // TODO: Handle truncation of build_vector constants
20686 return false;
20687
20688 EVT IntMemVT =
20690 Val = DAG.getConstant(C->getAPIntValue()
20691 .zextOrTrunc(Val.getValueSizeInBits())
20692 .zextOrTrunc(ElementSizeBits),
20693 SDLoc(C), IntMemVT);
20694 }
20695 // Make sure correctly size type is the correct type.
20696 Val = DAG.getBitcast(MemVT, Val);
20697 }
20698 BuildVector.push_back(Val);
20699 }
20700 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20702 DL, StoreTy, BuildVector);
20703 } else {
20705 for (unsigned i = 0; i < NumStores; ++i) {
20706 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20708 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
20709 // type MemVT. If the underlying value is not the correct
20710 // type, but it is an extraction of an appropriate vector we
20711 // can recast Val to be of the correct type. This may require
20712 // converting between EXTRACT_VECTOR_ELT and
20713 // EXTRACT_SUBVECTOR.
20714 if ((MemVT != Val.getValueType()) &&
20717 EVT MemVTScalarTy = MemVT.getScalarType();
20718 // We may need to add a bitcast here to get types to line up.
20719 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
20720 Val = DAG.getBitcast(MemVT, Val);
20721 } else if (MemVT.isVector() &&
20723 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
20724 } else {
20725 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
20727 SDValue Vec = Val.getOperand(0);
20728 SDValue Idx = Val.getOperand(1);
20729 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
20730 }
20731 }
20732 Ops.push_back(Val);
20733 }
20734
20735 // Build the extracted vector elements back into a vector.
20736 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20738 DL, StoreTy, Ops);
20739 }
20740 } else {
20741 // We should always use a vector store when merging extracted vector
20742 // elements, so this path implies a store of constants.
20743 assert(IsConstantSrc && "Merged vector elements should use vector store");
20744
20745 APInt StoreInt(SizeInBits, 0);
20746
20747 // Construct a single integer constant which is made of the smaller
20748 // constant inputs.
20749 bool IsLE = DAG.getDataLayout().isLittleEndian();
20750 for (unsigned i = 0; i < NumStores; ++i) {
20751 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20752 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
20753
20754 SDValue Val = St->getValue();
20755 Val = peekThroughBitcasts(Val);
20756 StoreInt <<= ElementSizeBits;
20757 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
20758 StoreInt |= C->getAPIntValue()
20759 .zextOrTrunc(ElementSizeBits)
20760 .zextOrTrunc(SizeInBits);
20761 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
20762 StoreInt |= C->getValueAPF()
20763 .bitcastToAPInt()
20764 .zextOrTrunc(ElementSizeBits)
20765 .zextOrTrunc(SizeInBits);
20766 // If fp truncation is necessary give up for now.
20767 if (MemVT.getSizeInBits() != ElementSizeBits)
20768 return false;
20769 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
20771 // Not yet handled
20772 return false;
20773 } else {
20774 llvm_unreachable("Invalid constant element type");
20775 }
20776 }
20777
20778 // Create the new Load and Store operations.
20779 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
20780 }
20781
20782 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20783 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
20784 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20785
20786 // make sure we use trunc store if it's necessary to be legal.
20787 // When generate the new widen store, if the first store's pointer info can
20788 // not be reused, discard the pointer info except the address space because
20789 // now the widen store can not be represented by the original pointer info
20790 // which is for the narrow memory object.
20791 SDValue NewStore;
20792 if (!UseTrunc) {
20793 NewStore = DAG.getStore(
20794 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20795 CanReusePtrInfo
20796 ? FirstInChain->getPointerInfo()
20797 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20798 FirstInChain->getAlign(), *Flags, AAInfo);
20799 } else { // Must be realized as a trunc store
20800 EVT LegalizedStoredValTy =
20801 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20802 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20803 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20804 SDValue ExtendedStoreVal =
20805 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20806 LegalizedStoredValTy);
20807 NewStore = DAG.getTruncStore(
20808 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20809 CanReusePtrInfo
20810 ? FirstInChain->getPointerInfo()
20811 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20812 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20813 AAInfo);
20814 }
20815
20816 // Replace all merged stores with the new store.
20817 for (unsigned i = 0; i < NumStores; ++i)
20818 CombineTo(StoreNodes[i].MemNode, NewStore);
20819
20820 AddToWorklist(NewChain.getNode());
20821 return true;
20822}
20823
20824SDNode *
20825DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
20826 SmallVectorImpl<MemOpLink> &StoreNodes) {
20827 // This holds the base pointer, index, and the offset in bytes from the base
20828 // pointer. We must have a base and an offset. Do not handle stores to undef
20829 // base pointers.
20831 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20832 return nullptr;
20833
20835 StoreSource StoreSrc = getStoreSource(Val);
20836 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20837
20838 // Match on loadbaseptr if relevant.
20839 EVT MemVT = St->getMemoryVT();
20840 BaseIndexOffset LBasePtr;
20841 EVT LoadVT;
20842 if (StoreSrc == StoreSource::Load) {
20843 auto *Ld = cast<LoadSDNode>(Val);
20844 LBasePtr = BaseIndexOffset::match(Ld, DAG);
20845 LoadVT = Ld->getMemoryVT();
20846 // Load and store should be the same type.
20847 if (MemVT != LoadVT)
20848 return nullptr;
20849 // Loads must only have one use.
20850 if (!Ld->hasNUsesOfValue(1, 0))
20851 return nullptr;
20852 // The memory operands must not be volatile/indexed/atomic.
20853 // TODO: May be able to relax for unordered atomics (see D66309)
20854 if (!Ld->isSimple() || Ld->isIndexed())
20855 return nullptr;
20856 }
20857 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20858 int64_t &Offset) -> bool {
20859 // The memory operands must not be volatile/indexed/atomic.
20860 // TODO: May be able to relax for unordered atomics (see D66309)
20861 if (!Other->isSimple() || Other->isIndexed())
20862 return false;
20863 // Don't mix temporal stores with non-temporal stores.
20864 if (St->isNonTemporal() != Other->isNonTemporal())
20865 return false;
20867 return false;
20868 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20869 // Allow merging constants of different types as integers.
20870 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20871 : Other->getMemoryVT() != MemVT;
20872 switch (StoreSrc) {
20873 case StoreSource::Load: {
20874 if (NoTypeMatch)
20875 return false;
20876 // The Load's Base Ptr must also match.
20877 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20878 if (!OtherLd)
20879 return false;
20880 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20881 if (LoadVT != OtherLd->getMemoryVT())
20882 return false;
20883 // Loads must only have one use.
20884 if (!OtherLd->hasNUsesOfValue(1, 0))
20885 return false;
20886 // The memory operands must not be volatile/indexed/atomic.
20887 // TODO: May be able to relax for unordered atomics (see D66309)
20888 if (!OtherLd->isSimple() || OtherLd->isIndexed())
20889 return false;
20890 // Don't mix temporal loads with non-temporal loads.
20891 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20892 return false;
20893 if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20894 *OtherLd))
20895 return false;
20896 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20897 return false;
20898 break;
20899 }
20900 case StoreSource::Constant:
20901 if (NoTypeMatch)
20902 return false;
20903 if (getStoreSource(OtherBC) != StoreSource::Constant)
20904 return false;
20905 break;
20906 case StoreSource::Extract:
20907 // Do not merge truncated stores here.
20908 if (Other->isTruncatingStore())
20909 return false;
20910 if (!MemVT.bitsEq(OtherBC.getValueType()))
20911 return false;
20912 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20913 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20914 return false;
20915 break;
20916 default:
20917 llvm_unreachable("Unhandled store source for merging");
20918 }
20920 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20921 };
20922
20923 // We are looking for a root node which is an ancestor to all mergable
20924 // stores. We search up through a load, to our root and then down
20925 // through all children. For instance we will find Store{1,2,3} if
20926 // St is Store1, Store2. or Store3 where the root is not a load
20927 // which always true for nonvolatile ops. TODO: Expand
20928 // the search to find all valid candidates through multiple layers of loads.
20929 //
20930 // Root
20931 // |-------|-------|
20932 // Load Load Store3
20933 // | |
20934 // Store1 Store2
20935 //
20936 // FIXME: We should be able to climb and
20937 // descend TokenFactors to find candidates as well.
20938
20939 SDNode *RootNode = St->getChain().getNode();
20940 // Bail out if we already analyzed this root node and found nothing.
20941 if (ChainsWithoutMergeableStores.contains(RootNode))
20942 return nullptr;
20943
20944 // Check if the pair of StoreNode and the RootNode already bail out many
20945 // times which is over the limit in dependence check.
20946 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20947 SDNode *RootNode) -> bool {
20948 auto RootCount = StoreRootCountMap.find(StoreNode);
20949 return RootCount != StoreRootCountMap.end() &&
20950 RootCount->second.first == RootNode &&
20951 RootCount->second.second > StoreMergeDependenceLimit;
20952 };
20953
20954 auto TryToAddCandidate = [&](SDUse &Use) {
20955 // This must be a chain use.
20956 if (Use.getOperandNo() != 0)
20957 return;
20958 if (auto *OtherStore = dyn_cast<StoreSDNode>(Use.getUser())) {
20960 int64_t PtrDiff;
20961 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20962 !OverLimitInDependenceCheck(OtherStore, RootNode))
20963 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20964 }
20965 };
20966
20967 unsigned NumNodesExplored = 0;
20968 const unsigned MaxSearchNodes = 1024;
20969 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20970 RootNode = Ldn->getChain().getNode();
20971 // Bail out if we already analyzed this root node and found nothing.
20972 if (ChainsWithoutMergeableStores.contains(RootNode))
20973 return nullptr;
20974 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20975 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20976 SDNode *User = I->getUser();
20977 if (I->getOperandNo() == 0 && isa<LoadSDNode>(User)) { // walk down chain
20978 for (SDUse &U2 : User->uses())
20979 TryToAddCandidate(U2);
20980 }
20981 // Check stores that depend on the root (e.g. Store 3 in the chart above).
20982 if (I->getOperandNo() == 0 && isa<StoreSDNode>(User)) {
20983 TryToAddCandidate(*I);
20984 }
20985 }
20986 } else {
20987 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20988 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20989 TryToAddCandidate(*I);
20990 }
20991
20992 return RootNode;
20993}
20994
20995// We need to check that merging these stores does not cause a loop in the
20996// DAG. Any store candidate may depend on another candidate indirectly through
20997// its operands. Check in parallel by searching up from operands of candidates.
20998bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
20999 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
21000 SDNode *RootNode) {
21001 // FIXME: We should be able to truncate a full search of
21002 // predecessors by doing a BFS and keeping tabs the originating
21003 // stores from which worklist nodes come from in a similar way to
21004 // TokenFactor simplfication.
21005
21008
21009 // RootNode is a predecessor to all candidates so we need not search
21010 // past it. Add RootNode (peeking through TokenFactors). Do not count
21011 // these towards size check.
21012
21013 Worklist.push_back(RootNode);
21014 while (!Worklist.empty()) {
21015 auto N = Worklist.pop_back_val();
21016 if (!Visited.insert(N).second)
21017 continue; // Already present in Visited.
21018 if (N->getOpcode() == ISD::TokenFactor) {
21019 for (SDValue Op : N->ops())
21020 Worklist.push_back(Op.getNode());
21021 }
21022 }
21023
21024 // Don't count pruning nodes towards max.
21025 unsigned int Max = 1024 + Visited.size();
21026 // Search Ops of store candidates.
21027 for (unsigned i = 0; i < NumStores; ++i) {
21028 SDNode *N = StoreNodes[i].MemNode;
21029 // Of the 4 Store Operands:
21030 // * Chain (Op 0) -> We have already considered these
21031 // in candidate selection, but only by following the
21032 // chain dependencies. We could still have a chain
21033 // dependency to a load, that has a non-chain dep to
21034 // another load, that depends on a store, etc. So it is
21035 // possible to have dependencies that consist of a mix
21036 // of chain and non-chain deps, and we need to include
21037 // chain operands in the analysis here..
21038 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
21039 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
21040 // but aren't necessarily fromt the same base node, so
21041 // cycles possible (e.g. via indexed store).
21042 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
21043 // non-indexed stores). Not constant on all targets (e.g. ARM)
21044 // and so can participate in a cycle.
21045 for (const SDValue &Op : N->op_values())
21046 Worklist.push_back(Op.getNode());
21047 }
21048 // Search through DAG. We can stop early if we find a store node.
21049 for (unsigned i = 0; i < NumStores; ++i)
21050 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
21051 Max)) {
21052 // If the searching bail out, record the StoreNode and RootNode in the
21053 // StoreRootCountMap. If we have seen the pair many times over a limit,
21054 // we won't add the StoreNode into StoreNodes set again.
21055 if (Visited.size() >= Max) {
21056 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
21057 if (RootCount.first == RootNode)
21058 RootCount.second++;
21059 else
21060 RootCount = {RootNode, 1};
21061 }
21062 return false;
21063 }
21064 return true;
21065}
21066
21067unsigned
21068DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
21069 int64_t ElementSizeBytes) const {
21070 while (true) {
21071 // Find a store past the width of the first store.
21072 size_t StartIdx = 0;
21073 while ((StartIdx + 1 < StoreNodes.size()) &&
21074 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
21075 StoreNodes[StartIdx + 1].OffsetFromBase)
21076 ++StartIdx;
21077
21078 // Bail if we don't have enough candidates to merge.
21079 if (StartIdx + 1 >= StoreNodes.size())
21080 return 0;
21081
21082 // Trim stores that overlapped with the first store.
21083 if (StartIdx)
21084 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
21085
21086 // Scan the memory operations on the chain and find the first
21087 // non-consecutive store memory address.
21088 unsigned NumConsecutiveStores = 1;
21089 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
21090 // Check that the addresses are consecutive starting from the second
21091 // element in the list of stores.
21092 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
21093 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
21094 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21095 break;
21096 NumConsecutiveStores = i + 1;
21097 }
21098 if (NumConsecutiveStores > 1)
21099 return NumConsecutiveStores;
21100
21101 // There are no consecutive stores at the start of the list.
21102 // Remove the first store and try again.
21103 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
21104 }
21105}
21106
21107bool DAGCombiner::tryStoreMergeOfConstants(
21108 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
21109 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
21110 LLVMContext &Context = *DAG.getContext();
21111 const DataLayout &DL = DAG.getDataLayout();
21112 int64_t ElementSizeBytes = MemVT.getStoreSize();
21113 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21114 bool MadeChange = false;
21115
21116 // Store the constants into memory as one consecutive store.
21117 while (NumConsecutiveStores >= 2) {
21118 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21119 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21120 Align FirstStoreAlign = FirstInChain->getAlign();
21121 unsigned LastLegalType = 1;
21122 unsigned LastLegalVectorType = 1;
21123 bool LastIntegerTrunc = false;
21124 bool NonZero = false;
21125 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
21126 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21127 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
21128 SDValue StoredVal = ST->getValue();
21129 bool IsElementZero = false;
21130 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
21131 IsElementZero = C->isZero();
21132 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
21133 IsElementZero = C->getConstantFPValue()->isNullValue();
21134 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
21135 IsElementZero = true;
21136 if (IsElementZero) {
21137 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
21138 FirstZeroAfterNonZero = i;
21139 }
21140 NonZero |= !IsElementZero;
21141
21142 // Find a legal type for the constant store.
21143 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
21144 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
21145 unsigned IsFast = 0;
21146
21147 // Break early when size is too large to be legal.
21148 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
21149 break;
21150
21151 if (TLI.isTypeLegal(StoreTy) &&
21152 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21153 DAG.getMachineFunction()) &&
21154 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21155 *FirstInChain->getMemOperand(), &IsFast) &&
21156 IsFast) {
21157 LastIntegerTrunc = false;
21158 LastLegalType = i + 1;
21159 // Or check whether a truncstore is legal.
21160 } else if (TLI.getTypeAction(Context, StoreTy) ==
21162 EVT LegalizedStoredValTy =
21163 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
21164 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
21165 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
21166 DAG.getMachineFunction()) &&
21167 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21168 *FirstInChain->getMemOperand(), &IsFast) &&
21169 IsFast) {
21170 LastIntegerTrunc = true;
21171 LastLegalType = i + 1;
21172 }
21173 }
21174
21175 // We only use vectors if the target allows it and the function is not
21176 // marked with the noimplicitfloat attribute.
21177 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
21178 AllowVectors) {
21179 // Find a legal type for the vector store.
21180 unsigned Elts = (i + 1) * NumMemElts;
21181 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21182 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
21183 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
21184 TLI.allowsMemoryAccess(Context, DL, Ty,
21185 *FirstInChain->getMemOperand(), &IsFast) &&
21186 IsFast)
21187 LastLegalVectorType = i + 1;
21188 }
21189 }
21190
21191 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
21192 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
21193 bool UseTrunc = LastIntegerTrunc && !UseVector;
21194
21195 // Check if we found a legal integer type that creates a meaningful
21196 // merge.
21197 if (NumElem < 2) {
21198 // We know that candidate stores are in order and of correct
21199 // shape. While there is no mergeable sequence from the
21200 // beginning one may start later in the sequence. The only
21201 // reason a merge of size N could have failed where another of
21202 // the same size would not have, is if the alignment has
21203 // improved or we've dropped a non-zero value. Drop as many
21204 // candidates as we can here.
21205 unsigned NumSkip = 1;
21206 while ((NumSkip < NumConsecutiveStores) &&
21207 (NumSkip < FirstZeroAfterNonZero) &&
21208 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21209 NumSkip++;
21210
21211 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21212 NumConsecutiveStores -= NumSkip;
21213 continue;
21214 }
21215
21216 // Check that we can merge these candidates without causing a cycle.
21217 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
21218 RootNode)) {
21219 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21220 NumConsecutiveStores -= NumElem;
21221 continue;
21222 }
21223
21224 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
21225 /*IsConstantSrc*/ true,
21226 UseVector, UseTrunc);
21227
21228 // Remove merged stores for next iteration.
21229 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21230 NumConsecutiveStores -= NumElem;
21231 }
21232 return MadeChange;
21233}
21234
21235bool DAGCombiner::tryStoreMergeOfExtracts(
21236 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
21237 EVT MemVT, SDNode *RootNode) {
21238 LLVMContext &Context = *DAG.getContext();
21239 const DataLayout &DL = DAG.getDataLayout();
21240 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21241 bool MadeChange = false;
21242
21243 // Loop on Consecutive Stores on success.
21244 while (NumConsecutiveStores >= 2) {
21245 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21246 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21247 Align FirstStoreAlign = FirstInChain->getAlign();
21248 unsigned NumStoresToMerge = 1;
21249 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21250 // Find a legal type for the vector store.
21251 unsigned Elts = (i + 1) * NumMemElts;
21252 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
21253 unsigned IsFast = 0;
21254
21255 // Break early when size is too large to be legal.
21256 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
21257 break;
21258
21259 if (TLI.isTypeLegal(Ty) &&
21260 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
21261 TLI.allowsMemoryAccess(Context, DL, Ty,
21262 *FirstInChain->getMemOperand(), &IsFast) &&
21263 IsFast)
21264 NumStoresToMerge = i + 1;
21265 }
21266
21267 // Check if we found a legal integer type creating a meaningful
21268 // merge.
21269 if (NumStoresToMerge < 2) {
21270 // We know that candidate stores are in order and of correct
21271 // shape. While there is no mergeable sequence from the
21272 // beginning one may start later in the sequence. The only
21273 // reason a merge of size N could have failed where another of
21274 // the same size would not have, is if the alignment has
21275 // improved. Drop as many candidates as we can here.
21276 unsigned NumSkip = 1;
21277 while ((NumSkip < NumConsecutiveStores) &&
21278 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21279 NumSkip++;
21280
21281 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21282 NumConsecutiveStores -= NumSkip;
21283 continue;
21284 }
21285
21286 // Check that we can merge these candidates without causing a cycle.
21287 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
21288 RootNode)) {
21289 StoreNodes.erase(StoreNodes.begin(),
21290 StoreNodes.begin() + NumStoresToMerge);
21291 NumConsecutiveStores -= NumStoresToMerge;
21292 continue;
21293 }
21294
21295 MadeChange |= mergeStoresOfConstantsOrVecElts(
21296 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
21297 /*UseVector*/ true, /*UseTrunc*/ false);
21298
21299 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
21300 NumConsecutiveStores -= NumStoresToMerge;
21301 }
21302 return MadeChange;
21303}
21304
21305bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
21306 unsigned NumConsecutiveStores, EVT MemVT,
21307 SDNode *RootNode, bool AllowVectors,
21308 bool IsNonTemporalStore,
21309 bool IsNonTemporalLoad) {
21310 LLVMContext &Context = *DAG.getContext();
21311 const DataLayout &DL = DAG.getDataLayout();
21312 int64_t ElementSizeBytes = MemVT.getStoreSize();
21313 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21314 bool MadeChange = false;
21315
21316 // Look for load nodes which are used by the stored values.
21317 SmallVector<MemOpLink, 8> LoadNodes;
21318
21319 // Find acceptable loads. Loads need to have the same chain (token factor),
21320 // must not be zext, volatile, indexed, and they must be consecutive.
21321 BaseIndexOffset LdBasePtr;
21322
21323 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21324 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
21326 LoadSDNode *Ld = cast<LoadSDNode>(Val);
21327
21328 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
21329 // If this is not the first ptr that we check.
21330 int64_t LdOffset = 0;
21331 if (LdBasePtr.getBase().getNode()) {
21332 // The base ptr must be the same.
21333 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
21334 break;
21335 } else {
21336 // Check that all other base pointers are the same as this one.
21337 LdBasePtr = LdPtr;
21338 }
21339
21340 // We found a potential memory operand to merge.
21341 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
21342 }
21343
21344 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
21345 Align RequiredAlignment;
21346 bool NeedRotate = false;
21347 if (LoadNodes.size() == 2) {
21348 // If we have load/store pair instructions and we only have two values,
21349 // don't bother merging.
21350 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
21351 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
21352 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
21353 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
21354 break;
21355 }
21356 // If the loads are reversed, see if we can rotate the halves into place.
21357 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
21358 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
21359 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
21360 if (Offset0 - Offset1 == ElementSizeBytes &&
21361 (hasOperation(ISD::ROTL, PairVT) ||
21362 hasOperation(ISD::ROTR, PairVT))) {
21363 std::swap(LoadNodes[0], LoadNodes[1]);
21364 NeedRotate = true;
21365 }
21366 }
21367 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21368 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21369 Align FirstStoreAlign = FirstInChain->getAlign();
21370 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
21371
21372 // Scan the memory operations on the chain and find the first
21373 // non-consecutive load memory address. These variables hold the index in
21374 // the store node array.
21375
21376 unsigned LastConsecutiveLoad = 1;
21377
21378 // This variable refers to the size and not index in the array.
21379 unsigned LastLegalVectorType = 1;
21380 unsigned LastLegalIntegerType = 1;
21381 bool isDereferenceable = true;
21382 bool DoIntegerTruncate = false;
21383 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
21384 SDValue LoadChain = FirstLoad->getChain();
21385 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
21386 // All loads must share the same chain.
21387 if (LoadNodes[i].MemNode->getChain() != LoadChain)
21388 break;
21389
21390 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
21391 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21392 break;
21393 LastConsecutiveLoad = i;
21394
21395 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
21396 isDereferenceable = false;
21397
21398 // Find a legal type for the vector store.
21399 unsigned Elts = (i + 1) * NumMemElts;
21400 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21401
21402 // Break early when size is too large to be legal.
21403 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
21404 break;
21405
21406 unsigned IsFastSt = 0;
21407 unsigned IsFastLd = 0;
21408 // Don't try vector types if we need a rotate. We may still fail the
21409 // legality checks for the integer type, but we can't handle the rotate
21410 // case with vectors.
21411 // FIXME: We could use a shuffle in place of the rotate.
21412 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
21413 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21414 DAG.getMachineFunction()) &&
21415 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21416 *FirstInChain->getMemOperand(), &IsFastSt) &&
21417 IsFastSt &&
21418 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21419 *FirstLoad->getMemOperand(), &IsFastLd) &&
21420 IsFastLd) {
21421 LastLegalVectorType = i + 1;
21422 }
21423
21424 // Find a legal type for the integer store.
21425 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
21426 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
21427 if (TLI.isTypeLegal(StoreTy) &&
21428 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21429 DAG.getMachineFunction()) &&
21430 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21431 *FirstInChain->getMemOperand(), &IsFastSt) &&
21432 IsFastSt &&
21433 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21434 *FirstLoad->getMemOperand(), &IsFastLd) &&
21435 IsFastLd) {
21436 LastLegalIntegerType = i + 1;
21437 DoIntegerTruncate = false;
21438 // Or check whether a truncstore and extload is legal.
21439 } else if (TLI.getTypeAction(Context, StoreTy) ==
21441 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
21442 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
21443 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
21444 DAG.getMachineFunction()) &&
21445 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
21446 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
21447 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
21448 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21449 *FirstInChain->getMemOperand(), &IsFastSt) &&
21450 IsFastSt &&
21451 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21452 *FirstLoad->getMemOperand(), &IsFastLd) &&
21453 IsFastLd) {
21454 LastLegalIntegerType = i + 1;
21455 DoIntegerTruncate = true;
21456 }
21457 }
21458 }
21459
21460 // Only use vector types if the vector type is larger than the integer
21461 // type. If they are the same, use integers.
21462 bool UseVectorTy =
21463 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
21464 unsigned LastLegalType =
21465 std::max(LastLegalVectorType, LastLegalIntegerType);
21466
21467 // We add +1 here because the LastXXX variables refer to location while
21468 // the NumElem refers to array/index size.
21469 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
21470 NumElem = std::min(LastLegalType, NumElem);
21471 Align FirstLoadAlign = FirstLoad->getAlign();
21472
21473 if (NumElem < 2) {
21474 // We know that candidate stores are in order and of correct
21475 // shape. While there is no mergeable sequence from the
21476 // beginning one may start later in the sequence. The only
21477 // reason a merge of size N could have failed where another of
21478 // the same size would not have is if the alignment or either
21479 // the load or store has improved. Drop as many candidates as we
21480 // can here.
21481 unsigned NumSkip = 1;
21482 while ((NumSkip < LoadNodes.size()) &&
21483 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
21484 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21485 NumSkip++;
21486 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21487 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
21488 NumConsecutiveStores -= NumSkip;
21489 continue;
21490 }
21491
21492 // Check that we can merge these candidates without causing a cycle.
21493 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
21494 RootNode)) {
21495 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21496 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21497 NumConsecutiveStores -= NumElem;
21498 continue;
21499 }
21500
21501 // Find if it is better to use vectors or integers to load and store
21502 // to memory.
21503 EVT JointMemOpVT;
21504 if (UseVectorTy) {
21505 // Find a legal type for the vector store.
21506 unsigned Elts = NumElem * NumMemElts;
21507 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21508 } else {
21509 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
21510 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
21511 }
21512
21513 SDLoc LoadDL(LoadNodes[0].MemNode);
21514 SDLoc StoreDL(StoreNodes[0].MemNode);
21515
21516 // The merged loads are required to have the same incoming chain, so
21517 // using the first's chain is acceptable.
21518
21519 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
21520 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
21521 AddToWorklist(NewStoreChain.getNode());
21522
21523 MachineMemOperand::Flags LdMMOFlags =
21524 isDereferenceable ? MachineMemOperand::MODereferenceable
21526 if (IsNonTemporalLoad)
21528
21529 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
21530
21531 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
21534
21535 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
21536
21537 SDValue NewLoad, NewStore;
21538 if (UseVectorTy || !DoIntegerTruncate) {
21539 NewLoad = DAG.getLoad(
21540 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
21541 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
21542 SDValue StoreOp = NewLoad;
21543 if (NeedRotate) {
21544 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
21545 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
21546 "Unexpected type for rotate-able load pair");
21547 SDValue RotAmt =
21548 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
21549 // Target can convert to the identical ROTR if it does not have ROTL.
21550 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
21551 }
21552 NewStore = DAG.getStore(
21553 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
21554 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21555 : MachinePointerInfo(FirstStoreAS),
21556 FirstStoreAlign, StMMOFlags);
21557 } else { // This must be the truncstore/extload case
21558 EVT ExtendedTy =
21559 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
21560 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
21561 FirstLoad->getChain(), FirstLoad->getBasePtr(),
21562 FirstLoad->getPointerInfo(), JointMemOpVT,
21563 FirstLoadAlign, LdMMOFlags);
21564 NewStore = DAG.getTruncStore(
21565 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
21566 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21567 : MachinePointerInfo(FirstStoreAS),
21568 JointMemOpVT, FirstInChain->getAlign(),
21569 FirstInChain->getMemOperand()->getFlags());
21570 }
21571
21572 // Transfer chain users from old loads to the new load.
21573 for (unsigned i = 0; i < NumElem; ++i) {
21574 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
21576 SDValue(NewLoad.getNode(), 1));
21577 }
21578
21579 // Replace all stores with the new store. Recursively remove corresponding
21580 // values if they are no longer used.
21581 for (unsigned i = 0; i < NumElem; ++i) {
21582 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
21583 CombineTo(StoreNodes[i].MemNode, NewStore);
21584 if (Val->use_empty())
21585 recursivelyDeleteUnusedNodes(Val.getNode());
21586 }
21587
21588 MadeChange = true;
21589 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21590 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21591 NumConsecutiveStores -= NumElem;
21592 }
21593 return MadeChange;
21594}
21595
21596bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
21597 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
21598 return false;
21599
21600 // TODO: Extend this function to merge stores of scalable vectors.
21601 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
21602 // store since we know <vscale x 16 x i8> is exactly twice as large as
21603 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
21604 EVT MemVT = St->getMemoryVT();
21605 if (MemVT.isScalableVT())
21606 return false;
21607 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
21608 return false;
21609
21610 // This function cannot currently deal with non-byte-sized memory sizes.
21611 int64_t ElementSizeBytes = MemVT.getStoreSize();
21612 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
21613 return false;
21614
21615 // Do not bother looking at stored values that are not constants, loads, or
21616 // extracted vector elements.
21617 SDValue StoredVal = peekThroughBitcasts(St->getValue());
21618 const StoreSource StoreSrc = getStoreSource(StoredVal);
21619 if (StoreSrc == StoreSource::Unknown)
21620 return false;
21621
21622 SmallVector<MemOpLink, 8> StoreNodes;
21623 // Find potential store merge candidates by searching through chain sub-DAG
21624 SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
21625
21626 // Check if there is anything to merge.
21627 if (StoreNodes.size() < 2)
21628 return false;
21629
21630 // Sort the memory operands according to their distance from the
21631 // base pointer.
21632 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
21633 return LHS.OffsetFromBase < RHS.OffsetFromBase;
21634 });
21635
21636 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
21637 Attribute::NoImplicitFloat);
21638 bool IsNonTemporalStore = St->isNonTemporal();
21639 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
21640 cast<LoadSDNode>(StoredVal)->isNonTemporal();
21641
21642 // Store Merge attempts to merge the lowest stores. This generally
21643 // works out as if successful, as the remaining stores are checked
21644 // after the first collection of stores is merged. However, in the
21645 // case that a non-mergeable store is found first, e.g., {p[-2],
21646 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
21647 // mergeable cases. To prevent this, we prune such stores from the
21648 // front of StoreNodes here.
21649 bool MadeChange = false;
21650 while (StoreNodes.size() > 1) {
21651 unsigned NumConsecutiveStores =
21652 getConsecutiveStores(StoreNodes, ElementSizeBytes);
21653 // There are no more stores in the list to examine.
21654 if (NumConsecutiveStores == 0)
21655 return MadeChange;
21656
21657 // We have at least 2 consecutive stores. Try to merge them.
21658 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
21659 switch (StoreSrc) {
21660 case StoreSource::Constant:
21661 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
21662 MemVT, RootNode, AllowVectors);
21663 break;
21664
21665 case StoreSource::Extract:
21666 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
21667 MemVT, RootNode);
21668 break;
21669
21670 case StoreSource::Load:
21671 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
21672 MemVT, RootNode, AllowVectors,
21673 IsNonTemporalStore, IsNonTemporalLoad);
21674 break;
21675
21676 default:
21677 llvm_unreachable("Unhandled store source type");
21678 }
21679 }
21680
21681 // Remember if we failed to optimize, to save compile time.
21682 if (!MadeChange)
21683 ChainsWithoutMergeableStores.insert(RootNode);
21684
21685 return MadeChange;
21686}
21687
21688SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
21689 SDLoc SL(ST);
21690 SDValue ReplStore;
21691
21692 // Replace the chain to avoid dependency.
21693 if (ST->isTruncatingStore()) {
21694 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
21695 ST->getBasePtr(), ST->getMemoryVT(),
21696 ST->getMemOperand());
21697 } else {
21698 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
21699 ST->getMemOperand());
21700 }
21701
21702 // Create token to keep both nodes around.
21703 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
21704 MVT::Other, ST->getChain(), ReplStore);
21705
21706 // Make sure the new and old chains are cleaned up.
21707 AddToWorklist(Token.getNode());
21708
21709 // Don't add users to work list.
21710 return CombineTo(ST, Token, false);
21711}
21712
21713SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
21714 SDValue Value = ST->getValue();
21715 if (Value.getOpcode() == ISD::TargetConstantFP)
21716 return SDValue();
21717
21718 if (!ISD::isNormalStore(ST))
21719 return SDValue();
21720
21721 SDLoc DL(ST);
21722
21723 SDValue Chain = ST->getChain();
21724 SDValue Ptr = ST->getBasePtr();
21725
21726 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
21727
21728 // NOTE: If the original store is volatile, this transform must not increase
21729 // the number of stores. For example, on x86-32 an f64 can be stored in one
21730 // processor operation but an i64 (which is not legal) requires two. So the
21731 // transform should not be done in this case.
21732
21733 SDValue Tmp;
21734 switch (CFP->getSimpleValueType(0).SimpleTy) {
21735 default:
21736 llvm_unreachable("Unknown FP type");
21737 case MVT::f16: // We don't do this for these yet.
21738 case MVT::bf16:
21739 case MVT::f80:
21740 case MVT::f128:
21741 case MVT::ppcf128:
21742 return SDValue();
21743 case MVT::f32:
21744 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
21745 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
21746 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
21747 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
21748 MVT::i32);
21749 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
21750 }
21751
21752 return SDValue();
21753 case MVT::f64:
21754 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
21755 ST->isSimple()) ||
21756 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
21757 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
21758 getZExtValue(), SDLoc(CFP), MVT::i64);
21759 return DAG.getStore(Chain, DL, Tmp,
21760 Ptr, ST->getMemOperand());
21761 }
21762
21763 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
21764 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
21765 // Many FP stores are not made apparent until after legalize, e.g. for
21766 // argument passing. Since this is so common, custom legalize the
21767 // 64-bit integer store into two 32-bit stores.
21769 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
21770 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
21771 if (DAG.getDataLayout().isBigEndian())
21772 std::swap(Lo, Hi);
21773
21774 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21775 AAMDNodes AAInfo = ST->getAAInfo();
21776
21777 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21778 ST->getOriginalAlign(), MMOFlags, AAInfo);
21780 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
21781 ST->getPointerInfo().getWithOffset(4),
21782 ST->getOriginalAlign(), MMOFlags, AAInfo);
21783 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
21784 St0, St1);
21785 }
21786
21787 return SDValue();
21788 }
21789}
21790
21791// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
21792//
21793// If a store of a load with an element inserted into it has no other
21794// uses in between the chain, then we can consider the vector store
21795// dead and replace it with just the single scalar element store.
21796SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
21797 SDLoc DL(ST);
21798 SDValue Value = ST->getValue();
21799 SDValue Ptr = ST->getBasePtr();
21800 SDValue Chain = ST->getChain();
21801 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
21802 return SDValue();
21803
21804 SDValue Elt = Value.getOperand(1);
21805 SDValue Idx = Value.getOperand(2);
21806
21807 // If the element isn't byte sized or is implicitly truncated then we can't
21808 // compute an offset.
21809 EVT EltVT = Elt.getValueType();
21810 if (!EltVT.isByteSized() ||
21811 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21812 return SDValue();
21813
21814 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21815 if (!Ld || Ld->getBasePtr() != Ptr ||
21816 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21817 !ISD::isNormalStore(ST) ||
21818 Ld->getAddressSpace() != ST->getAddressSpace() ||
21820 return SDValue();
21821
21822 unsigned IsFast;
21823 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21824 Elt.getValueType(), ST->getAddressSpace(),
21825 ST->getAlign(), ST->getMemOperand()->getFlags(),
21826 &IsFast) ||
21827 !IsFast)
21828 return SDValue();
21829
21830 MachinePointerInfo PointerInfo(ST->getAddressSpace());
21831
21832 // If the offset is a known constant then try to recover the pointer
21833 // info
21834 SDValue NewPtr;
21835 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21836 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21837 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21838 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21839 } else {
21840 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21841 }
21842
21843 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21844 ST->getMemOperand()->getFlags());
21845}
21846
21847SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
21848 AtomicSDNode *ST = cast<AtomicSDNode>(N);
21849 SDValue Val = ST->getVal();
21850 EVT VT = Val.getValueType();
21851 EVT MemVT = ST->getMemoryVT();
21852
21853 if (MemVT.bitsLT(VT)) { // Is truncating store
21854 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
21855 MemVT.getScalarSizeInBits());
21856 // See if we can simplify the operation with SimplifyDemandedBits, which
21857 // only works if the value has a single use.
21858 if (SimplifyDemandedBits(Val, TruncDemandedBits))
21859 return SDValue(N, 0);
21860 }
21861
21862 return SDValue();
21863}
21864
21865SDValue DAGCombiner::visitSTORE(SDNode *N) {
21866 StoreSDNode *ST = cast<StoreSDNode>(N);
21867 SDValue Chain = ST->getChain();
21868 SDValue Value = ST->getValue();
21869 SDValue Ptr = ST->getBasePtr();
21870
21871 // If this is a store of a bit convert, store the input value if the
21872 // resultant store does not need a higher alignment than the original.
21873 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21874 ST->isUnindexed()) {
21875 EVT SVT = Value.getOperand(0).getValueType();
21876 // If the store is volatile, we only want to change the store type if the
21877 // resulting store is legal. Otherwise we might increase the number of
21878 // memory accesses. We don't care if the original type was legal or not
21879 // as we assume software couldn't rely on the number of accesses of an
21880 // illegal type.
21881 // TODO: May be able to relax for unordered atomics (see D66309)
21882 if (((!LegalOperations && ST->isSimple()) ||
21883 TLI.isOperationLegal(ISD::STORE, SVT)) &&
21884 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21885 DAG, *ST->getMemOperand())) {
21886 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21887 ST->getMemOperand());
21888 }
21889 }
21890
21891 // Turn 'store undef, Ptr' -> nothing.
21892 if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
21893 return Chain;
21894
21895 // Try to infer better alignment information than the store already has.
21896 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21897 !ST->isAtomic()) {
21898 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21899 if (*Alignment > ST->getAlign() &&
21900 isAligned(*Alignment, ST->getSrcValueOffset())) {
21901 SDValue NewStore =
21902 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21903 ST->getMemoryVT(), *Alignment,
21904 ST->getMemOperand()->getFlags(), ST->getAAInfo());
21905 // NewStore will always be N as we are only refining the alignment
21906 assert(NewStore.getNode() == N);
21907 (void)NewStore;
21908 }
21909 }
21910 }
21911
21912 // Try transforming a pair floating point load / store ops to integer
21913 // load / store ops.
21914 if (SDValue NewST = TransformFPLoadStorePair(N))
21915 return NewST;
21916
21917 // Try transforming several stores into STORE (BSWAP).
21918 if (SDValue Store = mergeTruncStores(ST))
21919 return Store;
21920
21921 if (ST->isUnindexed()) {
21922 // Walk up chain skipping non-aliasing memory nodes, on this store and any
21923 // adjacent stores.
21924 if (findBetterNeighborChains(ST)) {
21925 // replaceStoreChain uses CombineTo, which handled all of the worklist
21926 // manipulation. Return the original node to not do anything else.
21927 return SDValue(ST, 0);
21928 }
21929 Chain = ST->getChain();
21930 }
21931
21932 // FIXME: is there such a thing as a truncating indexed store?
21933 if (ST->isTruncatingStore() && ST->isUnindexed() &&
21934 Value.getValueType().isInteger() &&
21935 (!isa<ConstantSDNode>(Value) ||
21936 !cast<ConstantSDNode>(Value)->isOpaque())) {
21937 // Convert a truncating store of a extension into a standard store.
21938 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21939 Value.getOpcode() == ISD::SIGN_EXTEND ||
21940 Value.getOpcode() == ISD::ANY_EXTEND) &&
21941 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21942 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21943 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21944 ST->getMemOperand());
21945
21946 APInt TruncDemandedBits =
21947 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21948 ST->getMemoryVT().getScalarSizeInBits());
21949
21950 // See if we can simplify the operation with SimplifyDemandedBits, which
21951 // only works if the value has a single use.
21952 AddToWorklist(Value.getNode());
21953 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21954 // Re-visit the store if anything changed and the store hasn't been merged
21955 // with another node (N is deleted) SimplifyDemandedBits will add Value's
21956 // node back to the worklist if necessary, but we also need to re-visit
21957 // the Store node itself.
21958 if (N->getOpcode() != ISD::DELETED_NODE)
21959 AddToWorklist(N);
21960 return SDValue(N, 0);
21961 }
21962
21963 // Otherwise, see if we can simplify the input to this truncstore with
21964 // knowledge that only the low bits are being used. For example:
21965 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
21966 if (SDValue Shorter =
21967 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21968 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21969 ST->getMemOperand());
21970
21971 // If we're storing a truncated constant, see if we can simplify it.
21972 // TODO: Move this to targetShrinkDemandedConstant?
21973 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21974 if (!Cst->isOpaque()) {
21975 const APInt &CValue = Cst->getAPIntValue();
21976 APInt NewVal = CValue & TruncDemandedBits;
21977 if (NewVal != CValue) {
21978 SDValue Shorter =
21979 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21980 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21981 ST->getMemoryVT(), ST->getMemOperand());
21982 }
21983 }
21984 }
21985
21986 // If this is a load followed by a store to the same location, then the store
21987 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21988 // TODO: Add big-endian truncate support with test coverage.
21989 // TODO: Can relax for unordered atomics (see D66309)
21990 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21992 : Value;
21993 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21994 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21995 ST->isUnindexed() && ST->isSimple() &&
21996 Ld->getAddressSpace() == ST->getAddressSpace() &&
21997 // There can't be any side effects between the load and store, such as
21998 // a call or store.
22000 // The store is dead, remove it.
22001 return Chain;
22002 }
22003 }
22004
22005 // Try scalarizing vector stores of loads where we only change one element
22006 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
22007 return NewST;
22008
22009 // TODO: Can relax for unordered atomics (see D66309)
22010 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
22011 if (ST->isUnindexed() && ST->isSimple() &&
22012 ST1->isUnindexed() && ST1->isSimple()) {
22013 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
22014 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
22015 ST->getAddressSpace() == ST1->getAddressSpace()) {
22016 // If this is a store followed by a store with the same value to the
22017 // same location, then the store is dead/noop.
22018 return Chain;
22019 }
22020
22021 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
22022 !ST1->getBasePtr().isUndef() &&
22023 ST->getAddressSpace() == ST1->getAddressSpace()) {
22024 // If we consider two stores and one smaller in size is a scalable
22025 // vector type and another one a bigger size store with a fixed type,
22026 // then we could not allow the scalable store removal because we don't
22027 // know its final size in the end.
22028 if (ST->getMemoryVT().isScalableVector() ||
22029 ST1->getMemoryVT().isScalableVector()) {
22030 if (ST1->getBasePtr() == Ptr &&
22031 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
22032 ST->getMemoryVT().getStoreSize())) {
22033 CombineTo(ST1, ST1->getChain());
22034 return SDValue(N, 0);
22035 }
22036 } else {
22037 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
22038 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
22039 // If this is a store who's preceding store to a subset of the current
22040 // location and no one other node is chained to that store we can
22041 // effectively drop the store. Do not remove stores to undef as they
22042 // may be used as data sinks.
22043 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
22044 ChainBase,
22045 ST1->getMemoryVT().getFixedSizeInBits())) {
22046 CombineTo(ST1, ST1->getChain());
22047 return SDValue(N, 0);
22048 }
22049 }
22050 }
22051 }
22052 }
22053
22054 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
22055 // truncating store. We can do this even if this is already a truncstore.
22056 if ((Value.getOpcode() == ISD::FP_ROUND ||
22057 Value.getOpcode() == ISD::TRUNCATE) &&
22058 Value->hasOneUse() && ST->isUnindexed() &&
22059 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
22060 ST->getMemoryVT(), LegalOperations)) {
22061 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
22062 Ptr, ST->getMemoryVT(), ST->getMemOperand());
22063 }
22064
22065 // Always perform this optimization before types are legal. If the target
22066 // prefers, also try this after legalization to catch stores that were created
22067 // by intrinsics or other nodes.
22068 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
22069 while (true) {
22070 // There can be multiple store sequences on the same chain.
22071 // Keep trying to merge store sequences until we are unable to do so
22072 // or until we merge the last store on the chain.
22073 bool Changed = mergeConsecutiveStores(ST);
22074 if (!Changed) break;
22075 // Return N as merge only uses CombineTo and no worklist clean
22076 // up is necessary.
22077 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
22078 return SDValue(N, 0);
22079 }
22080 }
22081
22082 // Try transforming N to an indexed store.
22083 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
22084 return SDValue(N, 0);
22085
22086 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
22087 //
22088 // Make sure to do this only after attempting to merge stores in order to
22089 // avoid changing the types of some subset of stores due to visit order,
22090 // preventing their merging.
22091 if (isa<ConstantFPSDNode>(ST->getValue())) {
22092 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
22093 return NewSt;
22094 }
22095
22096 if (SDValue NewSt = splitMergedValStore(ST))
22097 return NewSt;
22098
22099 return ReduceLoadOpStoreWidth(N);
22100}
22101
22102SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
22103 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
22104 if (!LifetimeEnd->hasOffset())
22105 return SDValue();
22106
22107 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
22108 LifetimeEnd->getOffset(), false);
22109
22110 // We walk up the chains to find stores.
22111 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
22112 while (!Chains.empty()) {
22113 SDValue Chain = Chains.pop_back_val();
22114 if (!Chain.hasOneUse())
22115 continue;
22116 switch (Chain.getOpcode()) {
22117 case ISD::TokenFactor:
22118 for (unsigned Nops = Chain.getNumOperands(); Nops;)
22119 Chains.push_back(Chain.getOperand(--Nops));
22120 break;
22122 case ISD::LIFETIME_END:
22123 // We can forward past any lifetime start/end that can be proven not to
22124 // alias the node.
22125 if (!mayAlias(Chain.getNode(), N))
22126 Chains.push_back(Chain.getOperand(0));
22127 break;
22128 case ISD::STORE: {
22129 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
22130 // TODO: Can relax for unordered atomics (see D66309)
22131 if (!ST->isSimple() || ST->isIndexed())
22132 continue;
22133 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
22134 // The bounds of a scalable store are not known until runtime, so this
22135 // store cannot be elided.
22136 if (StoreSize.isScalable())
22137 continue;
22138 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
22139 // If we store purely within object bounds just before its lifetime ends,
22140 // we can remove the store.
22141 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
22142 StoreSize.getFixedValue() * 8)) {
22143 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
22144 dbgs() << "\nwithin LIFETIME_END of : ";
22145 LifetimeEndBase.dump(); dbgs() << "\n");
22146 CombineTo(ST, ST->getChain());
22147 return SDValue(N, 0);
22148 }
22149 }
22150 }
22151 }
22152 return SDValue();
22153}
22154
22155/// For the instruction sequence of store below, F and I values
22156/// are bundled together as an i64 value before being stored into memory.
22157/// Sometimes it is more efficent to generate separate stores for F and I,
22158/// which can remove the bitwise instructions or sink them to colder places.
22159///
22160/// (store (or (zext (bitcast F to i32) to i64),
22161/// (shl (zext I to i64), 32)), addr) -->
22162/// (store F, addr) and (store I, addr+4)
22163///
22164/// Similarly, splitting for other merged store can also be beneficial, like:
22165/// For pair of {i32, i32}, i64 store --> two i32 stores.
22166/// For pair of {i32, i16}, i64 store --> two i32 stores.
22167/// For pair of {i16, i16}, i32 store --> two i16 stores.
22168/// For pair of {i16, i8}, i32 store --> two i16 stores.
22169/// For pair of {i8, i8}, i16 store --> two i8 stores.
22170///
22171/// We allow each target to determine specifically which kind of splitting is
22172/// supported.
22173///
22174/// The store patterns are commonly seen from the simple code snippet below
22175/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
22176/// void goo(const std::pair<int, float> &);
22177/// hoo() {
22178/// ...
22179/// goo(std::make_pair(tmp, ftmp));
22180/// ...
22181/// }
22182///
22183SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
22184 if (OptLevel == CodeGenOptLevel::None)
22185 return SDValue();
22186
22187 // Can't change the number of memory accesses for a volatile store or break
22188 // atomicity for an atomic one.
22189 if (!ST->isSimple())
22190 return SDValue();
22191
22192 SDValue Val = ST->getValue();
22193 SDLoc DL(ST);
22194
22195 // Match OR operand.
22196 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
22197 return SDValue();
22198
22199 // Match SHL operand and get Lower and Higher parts of Val.
22200 SDValue Op1 = Val.getOperand(0);
22201 SDValue Op2 = Val.getOperand(1);
22202 SDValue Lo, Hi;
22203 if (Op1.getOpcode() != ISD::SHL) {
22204 std::swap(Op1, Op2);
22205 if (Op1.getOpcode() != ISD::SHL)
22206 return SDValue();
22207 }
22208 Lo = Op2;
22209 Hi = Op1.getOperand(0);
22210 if (!Op1.hasOneUse())
22211 return SDValue();
22212
22213 // Match shift amount to HalfValBitSize.
22214 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
22215 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
22216 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
22217 return SDValue();
22218
22219 // Lo and Hi are zero-extended from int with size less equal than 32
22220 // to i64.
22221 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
22222 !Lo.getOperand(0).getValueType().isScalarInteger() ||
22223 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
22224 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
22225 !Hi.getOperand(0).getValueType().isScalarInteger() ||
22226 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
22227 return SDValue();
22228
22229 // Use the EVT of low and high parts before bitcast as the input
22230 // of target query.
22231 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
22232 ? Lo.getOperand(0).getValueType()
22233 : Lo.getValueType();
22234 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
22235 ? Hi.getOperand(0).getValueType()
22236 : Hi.getValueType();
22237 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
22238 return SDValue();
22239
22240 // Start to split store.
22241 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
22242 AAMDNodes AAInfo = ST->getAAInfo();
22243
22244 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
22245 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
22246 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
22247 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
22248
22249 SDValue Chain = ST->getChain();
22250 SDValue Ptr = ST->getBasePtr();
22251 // Lower value store.
22252 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
22253 ST->getOriginalAlign(), MMOFlags, AAInfo);
22254 Ptr =
22255 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
22256 // Higher value store.
22257 SDValue St1 = DAG.getStore(
22258 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
22259 ST->getOriginalAlign(), MMOFlags, AAInfo);
22260 return St1;
22261}
22262
22263// Merge an insertion into an existing shuffle:
22264// (insert_vector_elt (vector_shuffle X, Y, Mask),
22265// .(extract_vector_elt X, N), InsIndex)
22266// --> (vector_shuffle X, Y, NewMask)
22267// and variations where shuffle operands may be CONCAT_VECTORS.
22269 SmallVectorImpl<int> &NewMask, SDValue Elt,
22270 unsigned InsIndex) {
22271 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
22272 !isa<ConstantSDNode>(Elt.getOperand(1)))
22273 return false;
22274
22275 // Vec's operand 0 is using indices from 0 to N-1 and
22276 // operand 1 from N to 2N - 1, where N is the number of
22277 // elements in the vectors.
22278 SDValue InsertVal0 = Elt.getOperand(0);
22279 int ElementOffset = -1;
22280
22281 // We explore the inputs of the shuffle in order to see if we find the
22282 // source of the extract_vector_elt. If so, we can use it to modify the
22283 // shuffle rather than perform an insert_vector_elt.
22285 ArgWorkList.emplace_back(Mask.size(), Y);
22286 ArgWorkList.emplace_back(0, X);
22287
22288 while (!ArgWorkList.empty()) {
22289 int ArgOffset;
22290 SDValue ArgVal;
22291 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
22292
22293 if (ArgVal == InsertVal0) {
22294 ElementOffset = ArgOffset;
22295 break;
22296 }
22297
22298 // Peek through concat_vector.
22299 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
22300 int CurrentArgOffset =
22301 ArgOffset + ArgVal.getValueType().getVectorNumElements();
22302 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
22303 for (SDValue Op : reverse(ArgVal->ops())) {
22304 CurrentArgOffset -= Step;
22305 ArgWorkList.emplace_back(CurrentArgOffset, Op);
22306 }
22307
22308 // Make sure we went through all the elements and did not screw up index
22309 // computation.
22310 assert(CurrentArgOffset == ArgOffset);
22311 }
22312 }
22313
22314 // If we failed to find a match, see if we can replace an UNDEF shuffle
22315 // operand.
22316 if (ElementOffset == -1) {
22317 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
22318 return false;
22319 ElementOffset = Mask.size();
22320 Y = InsertVal0;
22321 }
22322
22323 NewMask.assign(Mask.begin(), Mask.end());
22324 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
22325 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
22326 "NewMask[InsIndex] is out of bound");
22327 return true;
22328}
22329
22330// Merge an insertion into an existing shuffle:
22331// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
22332// InsIndex)
22333// --> (vector_shuffle X, Y) and variations where shuffle operands may be
22334// CONCAT_VECTORS.
22335SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
22336 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
22337 "Expected extract_vector_elt");
22338 SDValue InsertVal = N->getOperand(1);
22339 SDValue Vec = N->getOperand(0);
22340
22341 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
22342 if (!SVN || !Vec.hasOneUse())
22343 return SDValue();
22344
22345 ArrayRef<int> Mask = SVN->getMask();
22346 SDValue X = Vec.getOperand(0);
22347 SDValue Y = Vec.getOperand(1);
22348
22349 SmallVector<int, 16> NewMask(Mask);
22350 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
22351 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
22352 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
22353 if (LegalShuffle)
22354 return LegalShuffle;
22355 }
22356
22357 return SDValue();
22358}
22359
22360// Convert a disguised subvector insertion into a shuffle:
22361// insert_vector_elt V, (bitcast X from vector type), IdxC -->
22362// bitcast(shuffle (bitcast V), (extended X), Mask)
22363// Note: We do not use an insert_subvector node because that requires a
22364// legal subvector type.
22365SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
22366 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
22367 "Expected extract_vector_elt");
22368 SDValue InsertVal = N->getOperand(1);
22369
22370 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
22371 !InsertVal.getOperand(0).getValueType().isVector())
22372 return SDValue();
22373
22374 SDValue SubVec = InsertVal.getOperand(0);
22375 SDValue DestVec = N->getOperand(0);
22376 EVT SubVecVT = SubVec.getValueType();
22377 EVT VT = DestVec.getValueType();
22378 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
22379 // If the source only has a single vector element, the cost of creating adding
22380 // it to a vector is likely to exceed the cost of a insert_vector_elt.
22381 if (NumSrcElts == 1)
22382 return SDValue();
22383 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
22384 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
22385
22386 // Step 1: Create a shuffle mask that implements this insert operation. The
22387 // vector that we are inserting into will be operand 0 of the shuffle, so
22388 // those elements are just 'i'. The inserted subvector is in the first
22389 // positions of operand 1 of the shuffle. Example:
22390 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
22391 SmallVector<int, 16> Mask(NumMaskVals);
22392 for (unsigned i = 0; i != NumMaskVals; ++i) {
22393 if (i / NumSrcElts == InsIndex)
22394 Mask[i] = (i % NumSrcElts) + NumMaskVals;
22395 else
22396 Mask[i] = i;
22397 }
22398
22399 // Bail out if the target can not handle the shuffle we want to create.
22400 EVT SubVecEltVT = SubVecVT.getVectorElementType();
22401 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
22402 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
22403 return SDValue();
22404
22405 // Step 2: Create a wide vector from the inserted source vector by appending
22406 // undefined elements. This is the same size as our destination vector.
22407 SDLoc DL(N);
22408 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
22409 ConcatOps[0] = SubVec;
22410 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
22411
22412 // Step 3: Shuffle in the padded subvector.
22413 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
22414 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
22415 AddToWorklist(PaddedSubV.getNode());
22416 AddToWorklist(DestVecBC.getNode());
22417 AddToWorklist(Shuf.getNode());
22418 return DAG.getBitcast(VT, Shuf);
22419}
22420
22421// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
22422// possible and the new load will be quick. We use more loads but less shuffles
22423// and inserts.
22424SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
22425 EVT VT = N->getValueType(0);
22426
22427 // InsIndex is expected to be the first of last lane.
22428 if (!VT.isFixedLengthVector() ||
22429 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
22430 return SDValue();
22431
22432 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
22433 // depending on the InsIndex.
22434 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
22435 SDValue Scalar = N->getOperand(1);
22436 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
22437 return InsIndex == P.index() || P.value() < 0 ||
22438 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
22439 (InsIndex == VT.getVectorNumElements() - 1 &&
22440 P.value() == (int)P.index() + 1);
22441 }))
22442 return SDValue();
22443
22444 // We optionally skip over an extend so long as both loads are extended in the
22445 // same way from the same type.
22446 unsigned Extend = 0;
22447 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
22448 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
22449 Scalar.getOpcode() == ISD::ANY_EXTEND) {
22450 Extend = Scalar.getOpcode();
22451 Scalar = Scalar.getOperand(0);
22452 }
22453
22454 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
22455 if (!ScalarLoad)
22456 return SDValue();
22457
22458 SDValue Vec = Shuffle->getOperand(0);
22459 if (Extend) {
22460 if (Vec.getOpcode() != Extend)
22461 return SDValue();
22462 Vec = Vec.getOperand(0);
22463 }
22464 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
22465 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
22466 return SDValue();
22467
22468 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
22469 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
22470 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
22471 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
22472 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
22473 return SDValue();
22474
22475 // Check that the offset between the pointers to produce a single continuous
22476 // load.
22477 if (InsIndex == 0) {
22478 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
22479 -1))
22480 return SDValue();
22481 } else {
22483 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
22484 return SDValue();
22485 }
22486
22487 // And that the new unaligned load will be fast.
22488 unsigned IsFast = 0;
22489 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
22490 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
22491 Vec.getValueType(), VecLoad->getAddressSpace(),
22492 NewAlign, VecLoad->getMemOperand()->getFlags(),
22493 &IsFast) ||
22494 !IsFast)
22495 return SDValue();
22496
22497 // Calculate the new Ptr and create the new load.
22498 SDLoc DL(N);
22499 SDValue Ptr = ScalarLoad->getBasePtr();
22500 if (InsIndex != 0)
22501 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
22502 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
22503 MachinePointerInfo PtrInfo =
22504 InsIndex == 0 ? ScalarLoad->getPointerInfo()
22505 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
22506
22507 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
22508 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
22509 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
22510 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
22511 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
22512}
22513
22514SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
22515 SDValue InVec = N->getOperand(0);
22516 SDValue InVal = N->getOperand(1);
22517 SDValue EltNo = N->getOperand(2);
22518 SDLoc DL(N);
22519
22520 EVT VT = InVec.getValueType();
22521 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
22522
22523 // Insert into out-of-bounds element is undefined.
22524 if (IndexC && VT.isFixedLengthVector() &&
22525 IndexC->getZExtValue() >= VT.getVectorNumElements())
22526 return DAG.getUNDEF(VT);
22527
22528 // Remove redundant insertions:
22529 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
22530 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22531 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
22532 return InVec;
22533
22534 if (!IndexC) {
22535 // If this is variable insert to undef vector, it might be better to splat:
22536 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
22537 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
22538 return DAG.getSplat(VT, DL, InVal);
22539 return SDValue();
22540 }
22541
22542 if (VT.isScalableVector())
22543 return SDValue();
22544
22545 unsigned NumElts = VT.getVectorNumElements();
22546
22547 // We must know which element is being inserted for folds below here.
22548 unsigned Elt = IndexC->getZExtValue();
22549
22550 // Handle <1 x ???> vector insertion special cases.
22551 if (NumElts == 1) {
22552 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
22553 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22554 InVal.getOperand(0).getValueType() == VT &&
22555 isNullConstant(InVal.getOperand(1)))
22556 return InVal.getOperand(0);
22557 }
22558
22559 // Canonicalize insert_vector_elt dag nodes.
22560 // Example:
22561 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
22562 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
22563 //
22564 // Do this only if the child insert_vector node has one use; also
22565 // do this only if indices are both constants and Idx1 < Idx0.
22566 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
22567 && isa<ConstantSDNode>(InVec.getOperand(2))) {
22568 unsigned OtherElt = InVec.getConstantOperandVal(2);
22569 if (Elt < OtherElt) {
22570 // Swap nodes.
22571 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
22572 InVec.getOperand(0), InVal, EltNo);
22573 AddToWorklist(NewOp.getNode());
22574 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
22575 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
22576 }
22577 }
22578
22579 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
22580 return Shuf;
22581
22582 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
22583 return Shuf;
22584
22585 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
22586 return Shuf;
22587
22588 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
22589 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
22590 // vXi1 vector - we don't need to recurse.
22591 if (NumElts == 1)
22592 return DAG.getBuildVector(VT, DL, {InVal});
22593
22594 // If we haven't already collected the element, insert into the op list.
22595 EVT MaxEltVT = InVal.getValueType();
22596 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
22597 unsigned Idx) {
22598 if (!Ops[Idx]) {
22599 Ops[Idx] = Elt;
22600 if (VT.isInteger()) {
22601 EVT EltVT = Elt.getValueType();
22602 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
22603 }
22604 }
22605 };
22606
22607 // Ensure all the operands are the same value type, fill any missing
22608 // operands with UNDEF and create the BUILD_VECTOR.
22609 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
22610 assert(Ops.size() == NumElts && "Unexpected vector size");
22611 for (SDValue &Op : Ops) {
22612 if (Op)
22613 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
22614 else
22615 Op = DAG.getUNDEF(MaxEltVT);
22616 }
22617 return DAG.getBuildVector(VT, DL, Ops);
22618 };
22619
22620 SmallVector<SDValue, 8> Ops(NumElts, SDValue());
22621 Ops[Elt] = InVal;
22622
22623 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
22624 for (SDValue CurVec = InVec; CurVec;) {
22625 // UNDEF - build new BUILD_VECTOR from already inserted operands.
22626 if (CurVec.isUndef())
22627 return CanonicalizeBuildVector(Ops);
22628
22629 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
22630 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
22631 for (unsigned I = 0; I != NumElts; ++I)
22632 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
22633 return CanonicalizeBuildVector(Ops);
22634 }
22635
22636 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
22637 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
22638 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
22639 return CanonicalizeBuildVector(Ops);
22640 }
22641
22642 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
22643 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
22644 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
22645 if (CurIdx->getAPIntValue().ult(NumElts)) {
22646 unsigned Idx = CurIdx->getZExtValue();
22647 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
22648
22649 // Found entire BUILD_VECTOR.
22650 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
22651 return CanonicalizeBuildVector(Ops);
22652
22653 CurVec = CurVec->getOperand(0);
22654 continue;
22655 }
22656
22657 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
22658 // update the shuffle mask (and second operand if we started with unary
22659 // shuffle) and create a new legal shuffle.
22660 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
22661 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
22662 SDValue LHS = SVN->getOperand(0);
22663 SDValue RHS = SVN->getOperand(1);
22665 bool Merged = true;
22666 for (auto I : enumerate(Ops)) {
22667 SDValue &Op = I.value();
22668 if (Op) {
22669 SmallVector<int, 16> NewMask;
22670 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
22671 Merged = false;
22672 break;
22673 }
22674 Mask = std::move(NewMask);
22675 }
22676 }
22677 if (Merged)
22678 if (SDValue NewShuffle =
22679 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
22680 return NewShuffle;
22681 }
22682
22683 // If all insertions are zero value, try to convert to AND mask.
22684 // TODO: Do this for -1 with OR mask?
22685 if (!LegalOperations && llvm::isNullConstant(InVal) &&
22686 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
22687 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
22688 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
22689 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
22691 for (unsigned I = 0; I != NumElts; ++I)
22692 Mask[I] = Ops[I] ? Zero : AllOnes;
22693 return DAG.getNode(ISD::AND, DL, VT, CurVec,
22694 DAG.getBuildVector(VT, DL, Mask));
22695 }
22696
22697 // Failed to find a match in the chain - bail.
22698 break;
22699 }
22700
22701 // See if we can fill in the missing constant elements as zeros.
22702 // TODO: Should we do this for any constant?
22703 APInt DemandedZeroElts = APInt::getZero(NumElts);
22704 for (unsigned I = 0; I != NumElts; ++I)
22705 if (!Ops[I])
22706 DemandedZeroElts.setBit(I);
22707
22708 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
22709 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
22710 : DAG.getConstantFP(0, DL, MaxEltVT);
22711 for (unsigned I = 0; I != NumElts; ++I)
22712 if (!Ops[I])
22713 Ops[I] = Zero;
22714
22715 return CanonicalizeBuildVector(Ops);
22716 }
22717 }
22718
22719 return SDValue();
22720}
22721
22722SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
22723 SDValue EltNo,
22724 LoadSDNode *OriginalLoad) {
22725 assert(OriginalLoad->isSimple());
22726
22727 EVT ResultVT = EVE->getValueType(0);
22728 EVT VecEltVT = InVecVT.getVectorElementType();
22729
22730 // If the vector element type is not a multiple of a byte then we are unable
22731 // to correctly compute an address to load only the extracted element as a
22732 // scalar.
22733 if (!VecEltVT.isByteSized())
22734 return SDValue();
22735
22736 ISD::LoadExtType ExtTy =
22737 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
22738 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
22739 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
22740 return SDValue();
22741
22742 Align Alignment = OriginalLoad->getAlign();
22744 SDLoc DL(EVE);
22745 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
22746 int Elt = ConstEltNo->getZExtValue();
22747 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
22748 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
22749 Alignment = commonAlignment(Alignment, PtrOff);
22750 } else {
22751 // Discard the pointer info except the address space because the memory
22752 // operand can't represent this new access since the offset is variable.
22753 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
22754 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
22755 }
22756
22757 unsigned IsFast = 0;
22758 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
22759 OriginalLoad->getAddressSpace(), Alignment,
22760 OriginalLoad->getMemOperand()->getFlags(),
22761 &IsFast) ||
22762 !IsFast)
22763 return SDValue();
22764
22765 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
22766 InVecVT, EltNo);
22767
22768 // We are replacing a vector load with a scalar load. The new load must have
22769 // identical memory op ordering to the original.
22770 SDValue Load;
22771 if (ResultVT.bitsGT(VecEltVT)) {
22772 // If the result type of vextract is wider than the load, then issue an
22773 // extending load instead.
22774 ISD::LoadExtType ExtType =
22775 TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
22776 : ISD::EXTLOAD;
22777 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
22778 NewPtr, MPI, VecEltVT, Alignment,
22779 OriginalLoad->getMemOperand()->getFlags(),
22780 OriginalLoad->getAAInfo());
22781 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22782 } else {
22783 // The result type is narrower or the same width as the vector element
22784 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
22785 Alignment, OriginalLoad->getMemOperand()->getFlags(),
22786 OriginalLoad->getAAInfo());
22787 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22788 if (ResultVT.bitsLT(VecEltVT))
22789 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
22790 else
22791 Load = DAG.getBitcast(ResultVT, Load);
22792 }
22793 ++OpsNarrowed;
22794 return Load;
22795}
22796
22797/// Transform a vector binary operation into a scalar binary operation by moving
22798/// the math/logic after an extract element of a vector.
22800 const SDLoc &DL, bool LegalTypes) {
22801 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22802 SDValue Vec = ExtElt->getOperand(0);
22803 SDValue Index = ExtElt->getOperand(1);
22804 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22805 unsigned Opc = Vec.getOpcode();
22806 if (!IndexC || !Vec.hasOneUse() || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) ||
22807 Vec->getNumValues() != 1)
22808 return SDValue();
22809
22810 // Targets may want to avoid this to prevent an expensive register transfer.
22811 if (!TLI.shouldScalarizeBinop(Vec))
22812 return SDValue();
22813
22814 EVT ResVT = ExtElt->getValueType(0);
22815 if (Opc == ISD::SETCC &&
22816 (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
22817 return SDValue();
22818
22819 // Extracting an element of a vector constant is constant-folded, so this
22820 // transform is just replacing a vector op with a scalar op while moving the
22821 // extract.
22822 SDValue Op0 = Vec.getOperand(0);
22823 SDValue Op1 = Vec.getOperand(1);
22824 APInt SplatVal;
22825 if (!isAnyConstantBuildVector(Op0, true) &&
22826 !ISD::isConstantSplatVector(Op0.getNode(), SplatVal) &&
22827 !isAnyConstantBuildVector(Op1, true) &&
22828 !ISD::isConstantSplatVector(Op1.getNode(), SplatVal))
22829 return SDValue();
22830
22831 // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
22832 // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
22833 if (Opc == ISD::SETCC) {
22834 EVT OpVT = Op0.getValueType().getVectorElementType();
22835 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
22836 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
22837 SDValue NewVal = DAG.getSetCC(
22838 DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
22839 // We may need to sign- or zero-extend the result to match the same
22840 // behaviour as the vector version of SETCC.
22841 unsigned VecBoolContents = TLI.getBooleanContents(Vec.getValueType());
22842 if (ResVT != MVT::i1 &&
22843 VecBoolContents != TargetLowering::UndefinedBooleanContent &&
22844 VecBoolContents != TLI.getBooleanContents(ResVT)) {
22846 NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal,
22847 DAG.getValueType(MVT::i1));
22848 else
22849 NewVal = DAG.getZeroExtendInReg(NewVal, DL, MVT::i1);
22850 }
22851 return NewVal;
22852 }
22853 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
22854 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
22855 return DAG.getNode(Opc, DL, ResVT, Op0, Op1);
22856}
22857
22858// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22859// recursively analyse all of it's users. and try to model themselves as
22860// bit sequence extractions. If all of them agree on the new, narrower element
22861// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22862// new element type, do so now.
22863// This is mainly useful to recover from legalization that scalarized
22864// the vector as wide elements, but tries to rebuild it with narrower elements.
22865//
22866// Some more nodes could be modelled if that helps cover interesting patterns.
22867bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22868 SDNode *N) {
22869 // We perform this optimization post type-legalization because
22870 // the type-legalizer often scalarizes integer-promoted vectors.
22871 // Performing this optimization before may cause legalizaton cycles.
22872 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22873 return false;
22874
22875 // TODO: Add support for big-endian.
22876 if (DAG.getDataLayout().isBigEndian())
22877 return false;
22878
22879 SDValue VecOp = N->getOperand(0);
22880 EVT VecVT = VecOp.getValueType();
22881 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22882
22883 // We must start with a constant extraction index.
22884 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22885 if (!IndexC)
22886 return false;
22887
22888 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22889 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22890
22891 // TODO: deal with the case of implicit anyext of the extraction.
22892 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22893 EVT ScalarVT = N->getValueType(0);
22894 if (VecVT.getScalarType() != ScalarVT)
22895 return false;
22896
22897 // TODO: deal with the cases other than everything being integer-typed.
22898 if (!ScalarVT.isScalarInteger())
22899 return false;
22900
22901 struct Entry {
22903
22904 // Which bits of VecOp does it contain?
22905 unsigned BitPos;
22906 int NumBits;
22907 // NOTE: the actual width of \p Producer may be wider than NumBits!
22908
22909 Entry(Entry &&) = default;
22910 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22911 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22912
22913 Entry() = delete;
22914 Entry(const Entry &) = delete;
22915 Entry &operator=(const Entry &) = delete;
22916 Entry &operator=(Entry &&) = delete;
22917 };
22918 SmallVector<Entry, 32> Worklist;
22920
22921 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22922 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22923 /*NumBits=*/VecEltBitWidth);
22924
22925 while (!Worklist.empty()) {
22926 Entry E = Worklist.pop_back_val();
22927 // Does the node not even use any of the VecOp bits?
22928 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22929 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22930 return false; // Let's allow the other combines clean this up first.
22931 // Did we fail to model any of the users of the Producer?
22932 bool ProducerIsLeaf = false;
22933 // Look at each user of this Producer.
22934 for (SDNode *User : E.Producer->users()) {
22935 switch (User->getOpcode()) {
22936 // TODO: support ISD::BITCAST
22937 // TODO: support ISD::ANY_EXTEND
22938 // TODO: support ISD::ZERO_EXTEND
22939 // TODO: support ISD::SIGN_EXTEND
22940 case ISD::TRUNCATE:
22941 // Truncation simply means we keep position, but extract less bits.
22942 Worklist.emplace_back(User, E.BitPos,
22943 /*NumBits=*/User->getValueSizeInBits(0));
22944 break;
22945 // TODO: support ISD::SRA
22946 // TODO: support ISD::SHL
22947 case ISD::SRL:
22948 // We should be shifting the Producer by a constant amount.
22949 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22950 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22951 // Logical right-shift means that we start extraction later,
22952 // but stop it at the same position we did previously.
22953 unsigned ShAmt = ShAmtC->getZExtValue();
22954 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22955 break;
22956 }
22957 [[fallthrough]];
22958 default:
22959 // We can not model this user of the Producer.
22960 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22961 ProducerIsLeaf = true;
22962 // Profitability check: all users that we can not model
22963 // must be ISD::BUILD_VECTOR's.
22964 if (User->getOpcode() != ISD::BUILD_VECTOR)
22965 return false;
22966 break;
22967 }
22968 }
22969 if (ProducerIsLeaf)
22970 Leafs.emplace_back(std::move(E));
22971 }
22972
22973 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22974
22975 // If we are still at the same element granularity, give up,
22976 if (NewVecEltBitWidth == VecEltBitWidth)
22977 return false;
22978
22979 // The vector width must be a multiple of the new element width.
22980 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22981 return false;
22982
22983 // All leafs must agree on the new element width.
22984 // All leafs must not expect any "padding" bits ontop of that width.
22985 // All leafs must start extraction from multiple of that width.
22986 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22987 return (unsigned)E.NumBits == NewVecEltBitWidth &&
22988 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22989 E.BitPos % NewVecEltBitWidth == 0;
22990 }))
22991 return false;
22992
22993 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22994 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22995 VecVT.getSizeInBits() / NewVecEltBitWidth);
22996
22997 if (LegalTypes &&
22998 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
22999 return false;
23000
23001 if (LegalOperations &&
23002 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
23004 return false;
23005
23006 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
23007 for (const Entry &E : Leafs) {
23008 SDLoc DL(E.Producer);
23009 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
23010 assert(NewIndex < NewVecVT.getVectorNumElements() &&
23011 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
23012 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
23013 DAG.getVectorIdxConstant(NewIndex, DL));
23014 CombineTo(E.Producer, V);
23015 }
23016
23017 return true;
23018}
23019
23020SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
23021 SDValue VecOp = N->getOperand(0);
23022 SDValue Index = N->getOperand(1);
23023 EVT ScalarVT = N->getValueType(0);
23024 EVT VecVT = VecOp.getValueType();
23025 if (VecOp.isUndef())
23026 return DAG.getUNDEF(ScalarVT);
23027
23028 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
23029 //
23030 // This only really matters if the index is non-constant since other combines
23031 // on the constant elements already work.
23032 SDLoc DL(N);
23033 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
23034 Index == VecOp.getOperand(2)) {
23035 SDValue Elt = VecOp.getOperand(1);
23036 AddUsersToWorklist(VecOp.getNode());
23037 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
23038 }
23039
23040 // (vextract (scalar_to_vector val, 0) -> val
23041 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
23042 // Only 0'th element of SCALAR_TO_VECTOR is defined.
23043 if (DAG.isKnownNeverZero(Index))
23044 return DAG.getUNDEF(ScalarVT);
23045
23046 // Check if the result type doesn't match the inserted element type.
23047 // The inserted element and extracted element may have mismatched bitwidth.
23048 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
23049 SDValue InOp = VecOp.getOperand(0);
23050 if (InOp.getValueType() != ScalarVT) {
23051 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23052 if (InOp.getValueType().bitsGT(ScalarVT))
23053 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
23054 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
23055 }
23056 return InOp;
23057 }
23058
23059 // extract_vector_elt of out-of-bounds element -> UNDEF
23060 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23061 if (IndexC && VecVT.isFixedLengthVector() &&
23062 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
23063 return DAG.getUNDEF(ScalarVT);
23064
23065 // extract_vector_elt (build_vector x, y), 1 -> y
23066 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
23067 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
23068 TLI.isTypeLegal(VecVT)) {
23069 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
23070 VecVT.isFixedLengthVector()) &&
23071 "BUILD_VECTOR used for scalable vectors");
23072 unsigned IndexVal =
23073 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
23074 SDValue Elt = VecOp.getOperand(IndexVal);
23075 EVT InEltVT = Elt.getValueType();
23076
23077 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
23078 isNullConstant(Elt)) {
23079 // Sometimes build_vector's scalar input types do not match result type.
23080 if (ScalarVT == InEltVT)
23081 return Elt;
23082
23083 // TODO: It may be useful to truncate if free if the build_vector
23084 // implicitly converts.
23085 }
23086 }
23087
23088 if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL, LegalTypes))
23089 return BO;
23090
23091 if (VecVT.isScalableVector())
23092 return SDValue();
23093
23094 // All the code from this point onwards assumes fixed width vectors, but it's
23095 // possible that some of the combinations could be made to work for scalable
23096 // vectors too.
23097 unsigned NumElts = VecVT.getVectorNumElements();
23098 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
23099
23100 // See if the extracted element is constant, in which case fold it if its
23101 // a legal fp immediate.
23102 if (IndexC && ScalarVT.isFloatingPoint()) {
23103 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
23104 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
23105 if (KnownElt.isConstant()) {
23106 APFloat CstFP =
23107 APFloat(ScalarVT.getFltSemantics(), KnownElt.getConstant());
23108 if (TLI.isFPImmLegal(CstFP, ScalarVT))
23109 return DAG.getConstantFP(CstFP, DL, ScalarVT);
23110 }
23111 }
23112
23113 // TODO: These transforms should not require the 'hasOneUse' restriction, but
23114 // there are regressions on multiple targets without it. We can end up with a
23115 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
23116 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
23117 VecOp.hasOneUse()) {
23118 // The vector index of the LSBs of the source depend on the endian-ness.
23119 bool IsLE = DAG.getDataLayout().isLittleEndian();
23120 unsigned ExtractIndex = IndexC->getZExtValue();
23121 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
23122 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
23123 SDValue BCSrc = VecOp.getOperand(0);
23124 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
23125 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
23126
23127 // TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
23128 if (LegalTypes && BCSrc.getValueType().isInteger() &&
23129 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23130 BCSrc.getScalarValueSizeInBits() ==
23132 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
23133 // trunc i64 X to i32
23134 SDValue X = BCSrc.getOperand(0);
23135 EVT XVT = X.getValueType();
23136 assert(XVT.isScalarInteger() && ScalarVT.isScalarInteger() &&
23137 "Extract element and scalar to vector can't change element type "
23138 "from FP to integer.");
23139 unsigned XBitWidth = X.getValueSizeInBits();
23140 unsigned Scale = XBitWidth / VecEltBitWidth;
23141 BCTruncElt = IsLE ? 0 : Scale - 1;
23142
23143 // An extract element return value type can be wider than its vector
23144 // operand element type. In that case, the high bits are undefined, so
23145 // it's possible that we may need to extend rather than truncate.
23146 if (ExtractIndex < Scale && XBitWidth > VecEltBitWidth) {
23147 assert(XBitWidth % VecEltBitWidth == 0 &&
23148 "Scalar bitwidth must be a multiple of vector element bitwidth");
23149
23150 if (ExtractIndex != BCTruncElt) {
23151 unsigned ShiftIndex =
23152 IsLE ? ExtractIndex : (Scale - 1) - ExtractIndex;
23153 X = DAG.getNode(
23154 ISD::SRL, DL, XVT, X,
23155 DAG.getShiftAmountConstant(ShiftIndex * VecEltBitWidth, XVT, DL));
23156 }
23157
23158 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
23159 }
23160 }
23161 }
23162
23163 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
23164 // We only perform this optimization before the op legalization phase because
23165 // we may introduce new vector instructions which are not backed by TD
23166 // patterns. For example on AVX, extracting elements from a wide vector
23167 // without using extract_subvector. However, if we can find an underlying
23168 // scalar value, then we can always use that.
23169 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
23170 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
23171 // Find the new index to extract from.
23172 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
23173
23174 // Extracting an undef index is undef.
23175 if (OrigElt == -1)
23176 return DAG.getUNDEF(ScalarVT);
23177
23178 // Select the right vector half to extract from.
23179 SDValue SVInVec;
23180 if (OrigElt < (int)NumElts) {
23181 SVInVec = VecOp.getOperand(0);
23182 } else {
23183 SVInVec = VecOp.getOperand(1);
23184 OrigElt -= NumElts;
23185 }
23186
23187 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
23188 // TODO: Check if shuffle mask is legal?
23189 if (LegalOperations && TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VecVT) &&
23190 !VecOp.hasOneUse())
23191 return SDValue();
23192
23193 SDValue InOp = SVInVec.getOperand(OrigElt);
23194 if (InOp.getValueType() != ScalarVT) {
23195 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23196 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
23197 }
23198
23199 return InOp;
23200 }
23201
23202 // FIXME: We should handle recursing on other vector shuffles and
23203 // scalar_to_vector here as well.
23204
23205 if (!LegalOperations ||
23206 // FIXME: Should really be just isOperationLegalOrCustom.
23209 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
23210 DAG.getVectorIdxConstant(OrigElt, DL));
23211 }
23212 }
23213
23214 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
23215 // simplify it based on the (valid) extraction indices.
23216 if (llvm::all_of(VecOp->users(), [&](SDNode *Use) {
23217 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23218 Use->getOperand(0) == VecOp &&
23219 isa<ConstantSDNode>(Use->getOperand(1));
23220 })) {
23221 APInt DemandedElts = APInt::getZero(NumElts);
23222 for (SDNode *User : VecOp->users()) {
23223 auto *CstElt = cast<ConstantSDNode>(User->getOperand(1));
23224 if (CstElt->getAPIntValue().ult(NumElts))
23225 DemandedElts.setBit(CstElt->getZExtValue());
23226 }
23227 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
23228 // We simplified the vector operand of this extract element. If this
23229 // extract is not dead, visit it again so it is folded properly.
23230 if (N->getOpcode() != ISD::DELETED_NODE)
23231 AddToWorklist(N);
23232 return SDValue(N, 0);
23233 }
23234 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
23235 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
23236 // We simplified the vector operand of this extract element. If this
23237 // extract is not dead, visit it again so it is folded properly.
23238 if (N->getOpcode() != ISD::DELETED_NODE)
23239 AddToWorklist(N);
23240 return SDValue(N, 0);
23241 }
23242 }
23243
23244 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
23245 return SDValue(N, 0);
23246
23247 // Everything under here is trying to match an extract of a loaded value.
23248 // If the result of load has to be truncated, then it's not necessarily
23249 // profitable.
23250 bool BCNumEltsChanged = false;
23251 EVT ExtVT = VecVT.getVectorElementType();
23252 EVT LVT = ExtVT;
23253 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
23254 return SDValue();
23255
23256 if (VecOp.getOpcode() == ISD::BITCAST) {
23257 // Don't duplicate a load with other uses.
23258 if (!VecOp.hasOneUse())
23259 return SDValue();
23260
23261 EVT BCVT = VecOp.getOperand(0).getValueType();
23262 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
23263 return SDValue();
23264 if (NumElts != BCVT.getVectorNumElements())
23265 BCNumEltsChanged = true;
23266 VecOp = VecOp.getOperand(0);
23267 ExtVT = BCVT.getVectorElementType();
23268 }
23269
23270 // extract (vector load $addr), i --> load $addr + i * size
23271 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
23272 ISD::isNormalLoad(VecOp.getNode()) &&
23273 !Index->hasPredecessor(VecOp.getNode())) {
23274 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
23275 if (VecLoad && VecLoad->isSimple())
23276 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
23277 }
23278
23279 // Perform only after legalization to ensure build_vector / vector_shuffle
23280 // optimizations have already been done.
23281 if (!LegalOperations || !IndexC)
23282 return SDValue();
23283
23284 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
23285 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
23286 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
23287 int Elt = IndexC->getZExtValue();
23288 LoadSDNode *LN0 = nullptr;
23289 if (ISD::isNormalLoad(VecOp.getNode())) {
23290 LN0 = cast<LoadSDNode>(VecOp);
23291 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23292 VecOp.getOperand(0).getValueType() == ExtVT &&
23293 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
23294 // Don't duplicate a load with other uses.
23295 if (!VecOp.hasOneUse())
23296 return SDValue();
23297
23298 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
23299 }
23300 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
23301 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
23302 // =>
23303 // (load $addr+1*size)
23304
23305 // Don't duplicate a load with other uses.
23306 if (!VecOp.hasOneUse())
23307 return SDValue();
23308
23309 // If the bit convert changed the number of elements, it is unsafe
23310 // to examine the mask.
23311 if (BCNumEltsChanged)
23312 return SDValue();
23313
23314 // Select the input vector, guarding against out of range extract vector.
23315 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
23316 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
23317
23318 if (VecOp.getOpcode() == ISD::BITCAST) {
23319 // Don't duplicate a load with other uses.
23320 if (!VecOp.hasOneUse())
23321 return SDValue();
23322
23323 VecOp = VecOp.getOperand(0);
23324 }
23325 if (ISD::isNormalLoad(VecOp.getNode())) {
23326 LN0 = cast<LoadSDNode>(VecOp);
23327 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
23328 Index = DAG.getConstant(Elt, DL, Index.getValueType());
23329 }
23330 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
23331 VecVT.getVectorElementType() == ScalarVT &&
23332 (!LegalTypes ||
23333 TLI.isTypeLegal(
23335 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
23336 // -> extract_vector_elt a, 0
23337 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
23338 // -> extract_vector_elt a, 1
23339 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
23340 // -> extract_vector_elt b, 0
23341 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
23342 // -> extract_vector_elt b, 1
23343 EVT ConcatVT = VecOp.getOperand(0).getValueType();
23344 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
23345 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
23346 Index.getValueType());
23347
23348 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
23350 ConcatVT.getVectorElementType(),
23351 ConcatOp, NewIdx);
23352 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
23353 }
23354
23355 // Make sure we found a non-volatile load and the extractelement is
23356 // the only use.
23357 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
23358 return SDValue();
23359
23360 // If Idx was -1 above, Elt is going to be -1, so just return undef.
23361 if (Elt == -1)
23362 return DAG.getUNDEF(LVT);
23363
23364 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
23365}
23366
23367// Simplify (build_vec (ext )) to (bitcast (build_vec ))
23368SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
23369 // We perform this optimization post type-legalization because
23370 // the type-legalizer often scalarizes integer-promoted vectors.
23371 // Performing this optimization before may create bit-casts which
23372 // will be type-legalized to complex code sequences.
23373 // We perform this optimization only before the operation legalizer because we
23374 // may introduce illegal operations.
23375 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
23376 return SDValue();
23377
23378 unsigned NumInScalars = N->getNumOperands();
23379 SDLoc DL(N);
23380 EVT VT = N->getValueType(0);
23381
23382 // Check to see if this is a BUILD_VECTOR of a bunch of values
23383 // which come from any_extend or zero_extend nodes. If so, we can create
23384 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
23385 // optimizations. We do not handle sign-extend because we can't fill the sign
23386 // using shuffles.
23387 EVT SourceType = MVT::Other;
23388 bool AllAnyExt = true;
23389
23390 for (unsigned i = 0; i != NumInScalars; ++i) {
23391 SDValue In = N->getOperand(i);
23392 // Ignore undef inputs.
23393 if (In.isUndef()) continue;
23394
23395 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
23396 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
23397
23398 // Abort if the element is not an extension.
23399 if (!ZeroExt && !AnyExt) {
23400 SourceType = MVT::Other;
23401 break;
23402 }
23403
23404 // The input is a ZeroExt or AnyExt. Check the original type.
23405 EVT InTy = In.getOperand(0).getValueType();
23406
23407 // Check that all of the widened source types are the same.
23408 if (SourceType == MVT::Other)
23409 // First time.
23410 SourceType = InTy;
23411 else if (InTy != SourceType) {
23412 // Multiple income types. Abort.
23413 SourceType = MVT::Other;
23414 break;
23415 }
23416
23417 // Check if all of the extends are ANY_EXTENDs.
23418 AllAnyExt &= AnyExt;
23419 }
23420
23421 // In order to have valid types, all of the inputs must be extended from the
23422 // same source type and all of the inputs must be any or zero extend.
23423 // Scalar sizes must be a power of two.
23424 EVT OutScalarTy = VT.getScalarType();
23425 bool ValidTypes =
23426 SourceType != MVT::Other &&
23427 llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
23428 llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
23429
23430 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
23431 // turn into a single shuffle instruction.
23432 if (!ValidTypes)
23433 return SDValue();
23434
23435 // If we already have a splat buildvector, then don't fold it if it means
23436 // introducing zeros.
23437 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
23438 return SDValue();
23439
23440 bool isLE = DAG.getDataLayout().isLittleEndian();
23441 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
23442 assert(ElemRatio > 1 && "Invalid element size ratio");
23443 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
23444 DAG.getConstant(0, DL, SourceType);
23445
23446 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
23447 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
23448
23449 // Populate the new build_vector
23450 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
23451 SDValue Cast = N->getOperand(i);
23452 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
23453 Cast.getOpcode() == ISD::ZERO_EXTEND ||
23454 Cast.isUndef()) && "Invalid cast opcode");
23455 SDValue In;
23456 if (Cast.isUndef())
23457 In = DAG.getUNDEF(SourceType);
23458 else
23459 In = Cast->getOperand(0);
23460 unsigned Index = isLE ? (i * ElemRatio) :
23461 (i * ElemRatio + (ElemRatio - 1));
23462
23463 assert(Index < Ops.size() && "Invalid index");
23464 Ops[Index] = In;
23465 }
23466
23467 // The type of the new BUILD_VECTOR node.
23468 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
23469 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
23470 "Invalid vector size");
23471 // Check if the new vector type is legal.
23472 if (!isTypeLegal(VecVT) ||
23473 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
23475 return SDValue();
23476
23477 // Make the new BUILD_VECTOR.
23478 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
23479
23480 // The new BUILD_VECTOR node has the potential to be further optimized.
23481 AddToWorklist(BV.getNode());
23482 // Bitcast to the desired type.
23483 return DAG.getBitcast(VT, BV);
23484}
23485
23486// Simplify (build_vec (trunc $1)
23487// (trunc (srl $1 half-width))
23488// (trunc (srl $1 (2 * half-width))))
23489// to (bitcast $1)
23490SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
23491 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23492
23493 EVT VT = N->getValueType(0);
23494
23495 // Don't run this before LegalizeTypes if VT is legal.
23496 // Targets may have other preferences.
23497 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
23498 return SDValue();
23499
23500 // Only for little endian
23501 if (!DAG.getDataLayout().isLittleEndian())
23502 return SDValue();
23503
23504 SDLoc DL(N);
23505 EVT OutScalarTy = VT.getScalarType();
23506 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
23507
23508 // Only for power of two types to be sure that bitcast works well
23509 if (!isPowerOf2_64(ScalarTypeBitsize))
23510 return SDValue();
23511
23512 unsigned NumInScalars = N->getNumOperands();
23513
23514 // Look through bitcasts
23515 auto PeekThroughBitcast = [](SDValue Op) {
23516 if (Op.getOpcode() == ISD::BITCAST)
23517 return Op.getOperand(0);
23518 return Op;
23519 };
23520
23521 // The source value where all the parts are extracted.
23522 SDValue Src;
23523 for (unsigned i = 0; i != NumInScalars; ++i) {
23524 SDValue In = PeekThroughBitcast(N->getOperand(i));
23525 // Ignore undef inputs.
23526 if (In.isUndef()) continue;
23527
23528 if (In.getOpcode() != ISD::TRUNCATE)
23529 return SDValue();
23530
23531 In = PeekThroughBitcast(In.getOperand(0));
23532
23533 if (In.getOpcode() != ISD::SRL) {
23534 // For now only build_vec without shuffling, handle shifts here in the
23535 // future.
23536 if (i != 0)
23537 return SDValue();
23538
23539 Src = In;
23540 } else {
23541 // In is SRL
23542 SDValue part = PeekThroughBitcast(In.getOperand(0));
23543
23544 if (!Src) {
23545 Src = part;
23546 } else if (Src != part) {
23547 // Vector parts do not stem from the same variable
23548 return SDValue();
23549 }
23550
23551 SDValue ShiftAmtVal = In.getOperand(1);
23552 if (!isa<ConstantSDNode>(ShiftAmtVal))
23553 return SDValue();
23554
23555 uint64_t ShiftAmt = In.getConstantOperandVal(1);
23556
23557 // The extracted value is not extracted at the right position
23558 if (ShiftAmt != i * ScalarTypeBitsize)
23559 return SDValue();
23560 }
23561 }
23562
23563 // Only cast if the size is the same
23564 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
23565 return SDValue();
23566
23567 return DAG.getBitcast(VT, Src);
23568}
23569
23570SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
23571 ArrayRef<int> VectorMask,
23572 SDValue VecIn1, SDValue VecIn2,
23573 unsigned LeftIdx, bool DidSplitVec) {
23574 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
23575
23576 EVT VT = N->getValueType(0);
23577 EVT InVT1 = VecIn1.getValueType();
23578 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
23579
23580 unsigned NumElems = VT.getVectorNumElements();
23581 unsigned ShuffleNumElems = NumElems;
23582
23583 // If we artificially split a vector in two already, then the offsets in the
23584 // operands will all be based off of VecIn1, even those in VecIn2.
23585 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
23586
23587 uint64_t VTSize = VT.getFixedSizeInBits();
23588 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
23589 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
23590
23591 assert(InVT2Size <= InVT1Size &&
23592 "Inputs must be sorted to be in non-increasing vector size order.");
23593
23594 // We can't generate a shuffle node with mismatched input and output types.
23595 // Try to make the types match the type of the output.
23596 if (InVT1 != VT || InVT2 != VT) {
23597 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
23598 // If the output vector length is a multiple of both input lengths,
23599 // we can concatenate them and pad the rest with undefs.
23600 unsigned NumConcats = VTSize / InVT1Size;
23601 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
23602 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
23603 ConcatOps[0] = VecIn1;
23604 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
23605 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23606 VecIn2 = SDValue();
23607 } else if (InVT1Size == VTSize * 2) {
23608 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
23609 return SDValue();
23610
23611 if (!VecIn2.getNode()) {
23612 // If we only have one input vector, and it's twice the size of the
23613 // output, split it in two.
23614 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
23615 DAG.getVectorIdxConstant(NumElems, DL));
23616 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
23617 // Since we now have shorter input vectors, adjust the offset of the
23618 // second vector's start.
23619 Vec2Offset = NumElems;
23620 } else {
23621 assert(InVT2Size <= InVT1Size &&
23622 "Second input is not going to be larger than the first one.");
23623
23624 // VecIn1 is wider than the output, and we have another, possibly
23625 // smaller input. Pad the smaller input with undefs, shuffle at the
23626 // input vector width, and extract the output.
23627 // The shuffle type is different than VT, so check legality again.
23628 if (LegalOperations &&
23630 return SDValue();
23631
23632 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
23633 // lower it back into a BUILD_VECTOR. So if the inserted type is
23634 // illegal, don't even try.
23635 if (InVT1 != InVT2) {
23636 if (!TLI.isTypeLegal(InVT2))
23637 return SDValue();
23638 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23639 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23640 }
23641 ShuffleNumElems = NumElems * 2;
23642 }
23643 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
23644 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
23645 ConcatOps[0] = VecIn2;
23646 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23647 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
23648 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
23649 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
23650 return SDValue();
23651 // If dest vector has less than two elements, then use shuffle and extract
23652 // from larger regs will cost even more.
23653 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
23654 return SDValue();
23655 assert(InVT2Size <= InVT1Size &&
23656 "Second input is not going to be larger than the first one.");
23657
23658 // VecIn1 is wider than the output, and we have another, possibly
23659 // smaller input. Pad the smaller input with undefs, shuffle at the
23660 // input vector width, and extract the output.
23661 // The shuffle type is different than VT, so check legality again.
23662 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
23663 return SDValue();
23664
23665 if (InVT1 != InVT2) {
23666 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23667 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23668 }
23669 ShuffleNumElems = InVT1Size / VTSize * NumElems;
23670 } else {
23671 // TODO: Support cases where the length mismatch isn't exactly by a
23672 // factor of 2.
23673 // TODO: Move this check upwards, so that if we have bad type
23674 // mismatches, we don't create any DAG nodes.
23675 return SDValue();
23676 }
23677 }
23678
23679 // Initialize mask to undef.
23680 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
23681
23682 // Only need to run up to the number of elements actually used, not the
23683 // total number of elements in the shuffle - if we are shuffling a wider
23684 // vector, the high lanes should be set to undef.
23685 for (unsigned i = 0; i != NumElems; ++i) {
23686 if (VectorMask[i] <= 0)
23687 continue;
23688
23689 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
23690 if (VectorMask[i] == (int)LeftIdx) {
23691 Mask[i] = ExtIndex;
23692 } else if (VectorMask[i] == (int)LeftIdx + 1) {
23693 Mask[i] = Vec2Offset + ExtIndex;
23694 }
23695 }
23696
23697 // The type the input vectors may have changed above.
23698 InVT1 = VecIn1.getValueType();
23699
23700 // If we already have a VecIn2, it should have the same type as VecIn1.
23701 // If we don't, get an undef/zero vector of the appropriate type.
23702 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
23703 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
23704
23705 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
23706 if (ShuffleNumElems > NumElems)
23707 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
23708
23709 return Shuffle;
23710}
23711
23713 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23714
23715 // First, determine where the build vector is not undef.
23716 // TODO: We could extend this to handle zero elements as well as undefs.
23717 int NumBVOps = BV->getNumOperands();
23718 int ZextElt = -1;
23719 for (int i = 0; i != NumBVOps; ++i) {
23720 SDValue Op = BV->getOperand(i);
23721 if (Op.isUndef())
23722 continue;
23723 if (ZextElt == -1)
23724 ZextElt = i;
23725 else
23726 return SDValue();
23727 }
23728 // Bail out if there's no non-undef element.
23729 if (ZextElt == -1)
23730 return SDValue();
23731
23732 // The build vector contains some number of undef elements and exactly
23733 // one other element. That other element must be a zero-extended scalar
23734 // extracted from a vector at a constant index to turn this into a shuffle.
23735 // Also, require that the build vector does not implicitly truncate/extend
23736 // its elements.
23737 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
23738 EVT VT = BV->getValueType(0);
23739 SDValue Zext = BV->getOperand(ZextElt);
23740 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
23742 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
23744 return SDValue();
23745
23746 // The zero-extend must be a multiple of the source size, and we must be
23747 // building a vector of the same size as the source of the extract element.
23748 SDValue Extract = Zext.getOperand(0);
23749 unsigned DestSize = Zext.getValueSizeInBits();
23750 unsigned SrcSize = Extract.getValueSizeInBits();
23751 if (DestSize % SrcSize != 0 ||
23752 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
23753 return SDValue();
23754
23755 // Create a shuffle mask that will combine the extracted element with zeros
23756 // and undefs.
23757 int ZextRatio = DestSize / SrcSize;
23758 int NumMaskElts = NumBVOps * ZextRatio;
23759 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
23760 for (int i = 0; i != NumMaskElts; ++i) {
23761 if (i / ZextRatio == ZextElt) {
23762 // The low bits of the (potentially translated) extracted element map to
23763 // the source vector. The high bits map to zero. We will use a zero vector
23764 // as the 2nd source operand of the shuffle, so use the 1st element of
23765 // that vector (mask value is number-of-elements) for the high bits.
23766 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
23767 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
23768 : NumMaskElts;
23769 }
23770
23771 // Undef elements of the build vector remain undef because we initialize
23772 // the shuffle mask with -1.
23773 }
23774
23775 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
23776 // bitcast (shuffle V, ZeroVec, VectorMask)
23777 SDLoc DL(BV);
23778 EVT VecVT = Extract.getOperand(0).getValueType();
23779 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
23780 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23781 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
23782 ZeroVec, ShufMask, DAG);
23783 if (!Shuf)
23784 return SDValue();
23785 return DAG.getBitcast(VT, Shuf);
23786}
23787
23788// FIXME: promote to STLExtras.
23789template <typename R, typename T>
23790static auto getFirstIndexOf(R &&Range, const T &Val) {
23791 auto I = find(Range, Val);
23792 if (I == Range.end())
23793 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
23794 return std::distance(Range.begin(), I);
23795}
23796
23797// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
23798// operations. If the types of the vectors we're extracting from allow it,
23799// turn this into a vector_shuffle node.
23800SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
23801 SDLoc DL(N);
23802 EVT VT = N->getValueType(0);
23803
23804 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
23805 if (!isTypeLegal(VT))
23806 return SDValue();
23807
23809 return V;
23810
23811 // May only combine to shuffle after legalize if shuffle is legal.
23812 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
23813 return SDValue();
23814
23815 bool UsesZeroVector = false;
23816 unsigned NumElems = N->getNumOperands();
23817
23818 // Record, for each element of the newly built vector, which input vector
23819 // that element comes from. -1 stands for undef, 0 for the zero vector,
23820 // and positive values for the input vectors.
23821 // VectorMask maps each element to its vector number, and VecIn maps vector
23822 // numbers to their initial SDValues.
23823
23824 SmallVector<int, 8> VectorMask(NumElems, -1);
23826 VecIn.push_back(SDValue());
23827
23828 // If we have a single extract_element with a constant index, track the index
23829 // value.
23830 unsigned OneConstExtractIndex = ~0u;
23831
23832 // Count the number of extract_vector_elt sources (i.e. non-constant or undef)
23833 unsigned NumExtracts = 0;
23834
23835 for (unsigned i = 0; i != NumElems; ++i) {
23836 SDValue Op = N->getOperand(i);
23837
23838 if (Op.isUndef())
23839 continue;
23840
23841 // See if we can use a blend with a zero vector.
23842 // TODO: Should we generalize this to a blend with an arbitrary constant
23843 // vector?
23845 UsesZeroVector = true;
23846 VectorMask[i] = 0;
23847 continue;
23848 }
23849
23850 // Not an undef or zero. If the input is something other than an
23851 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23852 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
23853 return SDValue();
23854
23855 SDValue ExtractedFromVec = Op.getOperand(0);
23856 if (ExtractedFromVec.getValueType().isScalableVector())
23857 return SDValue();
23858 auto *ExtractIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
23859 if (!ExtractIdx)
23860 return SDValue();
23861
23862 if (ExtractIdx->getAsAPIntVal().uge(
23863 ExtractedFromVec.getValueType().getVectorNumElements()))
23864 return SDValue();
23865
23866 // All inputs must have the same element type as the output.
23867 if (VT.getVectorElementType() !=
23868 ExtractedFromVec.getValueType().getVectorElementType())
23869 return SDValue();
23870
23871 OneConstExtractIndex = ExtractIdx->getZExtValue();
23872 ++NumExtracts;
23873
23874 // Have we seen this input vector before?
23875 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23876 // a map back from SDValues to numbers isn't worth it.
23877 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23878 if (Idx == -1) { // A new source vector?
23879 Idx = VecIn.size();
23880 VecIn.push_back(ExtractedFromVec);
23881 }
23882
23883 VectorMask[i] = Idx;
23884 }
23885
23886 // If we didn't find at least one input vector, bail out.
23887 if (VecIn.size() < 2)
23888 return SDValue();
23889
23890 // If all the Operands of BUILD_VECTOR extract from same
23891 // vector, then split the vector efficiently based on the maximum
23892 // vector access index and adjust the VectorMask and
23893 // VecIn accordingly.
23894 bool DidSplitVec = false;
23895 if (VecIn.size() == 2) {
23896 // If we only found a single constant indexed extract_vector_elt feeding the
23897 // build_vector, do not produce a more complicated shuffle if the extract is
23898 // cheap with other constant/undef elements. Skip broadcast patterns with
23899 // multiple uses in the build_vector.
23900
23901 // TODO: This should be more aggressive about skipping the shuffle
23902 // formation, particularly if VecIn[1].hasOneUse(), and regardless of the
23903 // index.
23904 if (NumExtracts == 1 &&
23907 TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
23908 return SDValue();
23909
23910 unsigned MaxIndex = 0;
23911 unsigned NearestPow2 = 0;
23912 SDValue Vec = VecIn.back();
23913 EVT InVT = Vec.getValueType();
23914 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23915
23916 for (unsigned i = 0; i < NumElems; i++) {
23917 if (VectorMask[i] <= 0)
23918 continue;
23919 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23920 IndexVec[i] = Index;
23921 MaxIndex = std::max(MaxIndex, Index);
23922 }
23923
23924 NearestPow2 = PowerOf2Ceil(MaxIndex);
23925 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23926 NumElems * 2 < NearestPow2) {
23927 unsigned SplitSize = NearestPow2 / 2;
23928 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23929 InVT.getVectorElementType(), SplitSize);
23930 if (TLI.isTypeLegal(SplitVT) &&
23931 SplitSize + SplitVT.getVectorNumElements() <=
23932 InVT.getVectorNumElements()) {
23933 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23934 DAG.getVectorIdxConstant(SplitSize, DL));
23935 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23936 DAG.getVectorIdxConstant(0, DL));
23937 VecIn.pop_back();
23938 VecIn.push_back(VecIn1);
23939 VecIn.push_back(VecIn2);
23940 DidSplitVec = true;
23941
23942 for (unsigned i = 0; i < NumElems; i++) {
23943 if (VectorMask[i] <= 0)
23944 continue;
23945 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23946 }
23947 }
23948 }
23949 }
23950
23951 // Sort input vectors by decreasing vector element count,
23952 // while preserving the relative order of equally-sized vectors.
23953 // Note that we keep the first "implicit zero vector as-is.
23954 SmallVector<SDValue, 8> SortedVecIn(VecIn);
23955 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23956 [](const SDValue &a, const SDValue &b) {
23957 return a.getValueType().getVectorNumElements() >
23958 b.getValueType().getVectorNumElements();
23959 });
23960
23961 // We now also need to rebuild the VectorMask, because it referenced element
23962 // order in VecIn, and we just sorted them.
23963 for (int &SourceVectorIndex : VectorMask) {
23964 if (SourceVectorIndex <= 0)
23965 continue;
23966 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23967 assert(Idx > 0 && Idx < SortedVecIn.size() &&
23968 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23969 SourceVectorIndex = Idx;
23970 }
23971
23972 VecIn = std::move(SortedVecIn);
23973
23974 // TODO: Should this fire if some of the input vectors has illegal type (like
23975 // it does now), or should we let legalization run its course first?
23976
23977 // Shuffle phase:
23978 // Take pairs of vectors, and shuffle them so that the result has elements
23979 // from these vectors in the correct places.
23980 // For example, given:
23981 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23982 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23983 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23984 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23985 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23986 // We will generate:
23987 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23988 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23989 SmallVector<SDValue, 4> Shuffles;
23990 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23991 unsigned LeftIdx = 2 * In + 1;
23992 SDValue VecLeft = VecIn[LeftIdx];
23993 SDValue VecRight =
23994 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23995
23996 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23997 VecRight, LeftIdx, DidSplitVec))
23998 Shuffles.push_back(Shuffle);
23999 else
24000 return SDValue();
24001 }
24002
24003 // If we need the zero vector as an "ingredient" in the blend tree, add it
24004 // to the list of shuffles.
24005 if (UsesZeroVector)
24006 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
24007 : DAG.getConstantFP(0.0, DL, VT));
24008
24009 // If we only have one shuffle, we're done.
24010 if (Shuffles.size() == 1)
24011 return Shuffles[0];
24012
24013 // Update the vector mask to point to the post-shuffle vectors.
24014 for (int &Vec : VectorMask)
24015 if (Vec == 0)
24016 Vec = Shuffles.size() - 1;
24017 else
24018 Vec = (Vec - 1) / 2;
24019
24020 // More than one shuffle. Generate a binary tree of blends, e.g. if from
24021 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
24022 // generate:
24023 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
24024 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
24025 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
24026 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
24027 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
24028 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
24029 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
24030
24031 // Make sure the initial size of the shuffle list is even.
24032 if (Shuffles.size() % 2)
24033 Shuffles.push_back(DAG.getUNDEF(VT));
24034
24035 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
24036 if (CurSize % 2) {
24037 Shuffles[CurSize] = DAG.getUNDEF(VT);
24038 CurSize++;
24039 }
24040 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
24041 int Left = 2 * In;
24042 int Right = 2 * In + 1;
24043 SmallVector<int, 8> Mask(NumElems, -1);
24044 SDValue L = Shuffles[Left];
24045 ArrayRef<int> LMask;
24046 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
24047 L.use_empty() && L.getOperand(1).isUndef() &&
24048 L.getOperand(0).getValueType() == L.getValueType();
24049 if (IsLeftShuffle) {
24050 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
24051 L = L.getOperand(0);
24052 }
24053 SDValue R = Shuffles[Right];
24054 ArrayRef<int> RMask;
24055 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
24056 R.use_empty() && R.getOperand(1).isUndef() &&
24057 R.getOperand(0).getValueType() == R.getValueType();
24058 if (IsRightShuffle) {
24059 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
24060 R = R.getOperand(0);
24061 }
24062 for (unsigned I = 0; I != NumElems; ++I) {
24063 if (VectorMask[I] == Left) {
24064 Mask[I] = I;
24065 if (IsLeftShuffle)
24066 Mask[I] = LMask[I];
24067 VectorMask[I] = In;
24068 } else if (VectorMask[I] == Right) {
24069 Mask[I] = I + NumElems;
24070 if (IsRightShuffle)
24071 Mask[I] = RMask[I] + NumElems;
24072 VectorMask[I] = In;
24073 }
24074 }
24075
24076 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
24077 }
24078 }
24079 return Shuffles[0];
24080}
24081
24082// Try to turn a build vector of zero extends of extract vector elts into a
24083// a vector zero extend and possibly an extract subvector.
24084// TODO: Support sign extend?
24085// TODO: Allow undef elements?
24086SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
24087 if (LegalOperations)
24088 return SDValue();
24089
24090 EVT VT = N->getValueType(0);
24091
24092 bool FoundZeroExtend = false;
24093 SDValue Op0 = N->getOperand(0);
24094 auto checkElem = [&](SDValue Op) -> int64_t {
24095 unsigned Opc = Op.getOpcode();
24096 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
24097 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
24098 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24099 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
24100 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
24101 return C->getZExtValue();
24102 return -1;
24103 };
24104
24105 // Make sure the first element matches
24106 // (zext (extract_vector_elt X, C))
24107 // Offset must be a constant multiple of the
24108 // known-minimum vector length of the result type.
24109 int64_t Offset = checkElem(Op0);
24110 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
24111 return SDValue();
24112
24113 unsigned NumElems = N->getNumOperands();
24114 SDValue In = Op0.getOperand(0).getOperand(0);
24115 EVT InSVT = In.getValueType().getScalarType();
24116 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
24117
24118 // Don't create an illegal input type after type legalization.
24119 if (LegalTypes && !TLI.isTypeLegal(InVT))
24120 return SDValue();
24121
24122 // Ensure all the elements come from the same vector and are adjacent.
24123 for (unsigned i = 1; i != NumElems; ++i) {
24124 if ((Offset + i) != checkElem(N->getOperand(i)))
24125 return SDValue();
24126 }
24127
24128 SDLoc DL(N);
24129 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
24130 Op0.getOperand(0).getOperand(1));
24131 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
24132 VT, In);
24133}
24134
24135// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
24136// and all other elements being constant zero's, granularize the BUILD_VECTOR's
24137// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
24138// This patten can appear during legalization.
24139//
24140// NOTE: This can be generalized to allow more than a single
24141// non-constant-zero op, UNDEF's, and to be KnownBits-based,
24142SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
24143 // Don't run this after legalization. Targets may have other preferences.
24144 if (Level >= AfterLegalizeDAG)
24145 return SDValue();
24146
24147 // FIXME: support big-endian.
24148 if (DAG.getDataLayout().isBigEndian())
24149 return SDValue();
24150
24151 EVT VT = N->getValueType(0);
24152 EVT OpVT = N->getOperand(0).getValueType();
24153 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
24154
24155 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
24156
24157 if (!TLI.isTypeLegal(OpIntVT) ||
24158 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
24159 return SDValue();
24160
24161 unsigned EltBitwidth = VT.getScalarSizeInBits();
24162 // NOTE: the actual width of operands may be wider than that!
24163
24164 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
24165 // active bits they all have? We'll want to truncate them all to that width.
24166 unsigned ActiveBits = 0;
24167 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
24168 for (auto I : enumerate(N->ops())) {
24169 SDValue Op = I.value();
24170 // FIXME: support UNDEF elements?
24171 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
24172 unsigned OpActiveBits =
24173 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
24174 if (OpActiveBits == 0) {
24175 KnownZeroOps.setBit(I.index());
24176 continue;
24177 }
24178 // Profitability check: don't allow non-zero constant operands.
24179 return SDValue();
24180 }
24181 // Profitability check: there must only be a single non-zero operand,
24182 // and it must be the first operand of the BUILD_VECTOR.
24183 if (I.index() != 0)
24184 return SDValue();
24185 // The operand must be a zero-extension itself.
24186 // FIXME: this could be generalized to known leading zeros check.
24187 if (Op.getOpcode() != ISD::ZERO_EXTEND)
24188 return SDValue();
24189 unsigned CurrActiveBits =
24190 Op.getOperand(0).getValueSizeInBits().getFixedValue();
24191 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
24192 ActiveBits = CurrActiveBits;
24193 // We want to at least halve the element size.
24194 if (2 * ActiveBits > EltBitwidth)
24195 return SDValue();
24196 }
24197
24198 // This BUILD_VECTOR must have at least one non-constant-zero operand.
24199 if (ActiveBits == 0)
24200 return SDValue();
24201
24202 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
24203 // into how many chunks can we split our element width?
24204 EVT NewScalarIntVT, NewIntVT;
24205 std::optional<unsigned> Factor;
24206 // We can split the element into at least two chunks, but not into more
24207 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
24208 // for which the element width is a multiple of it,
24209 // and the resulting types/operations on that chunk width are legal.
24210 assert(2 * ActiveBits <= EltBitwidth &&
24211 "We know that half or less bits of the element are active.");
24212 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
24213 if (EltBitwidth % Scale != 0)
24214 continue;
24215 unsigned ChunkBitwidth = EltBitwidth / Scale;
24216 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
24217 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
24218 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
24219 Scale * N->getNumOperands());
24220 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
24221 (LegalOperations &&
24222 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
24224 continue;
24225 Factor = Scale;
24226 break;
24227 }
24228 if (!Factor)
24229 return SDValue();
24230
24231 SDLoc DL(N);
24232 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
24233
24234 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
24236 NewOps.reserve(NewIntVT.getVectorNumElements());
24237 for (auto I : enumerate(N->ops())) {
24238 SDValue Op = I.value();
24239 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
24240 unsigned SrcOpIdx = I.index();
24241 if (KnownZeroOps[SrcOpIdx]) {
24242 NewOps.append(*Factor, ZeroOp);
24243 continue;
24244 }
24245 Op = DAG.getBitcast(OpIntVT, Op);
24246 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
24247 NewOps.emplace_back(Op);
24248 NewOps.append(*Factor - 1, ZeroOp);
24249 }
24250 assert(NewOps.size() == NewIntVT.getVectorNumElements());
24251 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
24252 NewBV = DAG.getBitcast(VT, NewBV);
24253 return NewBV;
24254}
24255
24256SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
24257 EVT VT = N->getValueType(0);
24258
24259 // A vector built entirely of undefs is undef.
24261 return DAG.getUNDEF(VT);
24262
24263 // If this is a splat of a bitcast from another vector, change to a
24264 // concat_vector.
24265 // For example:
24266 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
24267 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
24268 //
24269 // If X is a build_vector itself, the concat can become a larger build_vector.
24270 // TODO: Maybe this is useful for non-splat too?
24271 if (!LegalOperations) {
24272 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
24273 // Only change build_vector to a concat_vector if the splat value type is
24274 // same as the vector element type.
24275 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
24277 EVT SrcVT = Splat.getValueType();
24278 if (SrcVT.isVector()) {
24279 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
24280 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
24281 SrcVT.getVectorElementType(), NumElts);
24282 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
24283 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
24284 SDValue Concat =
24285 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
24286 return DAG.getBitcast(VT, Concat);
24287 }
24288 }
24289 }
24290 }
24291
24292 // Check if we can express BUILD VECTOR via subvector extract.
24293 if (!LegalTypes && (N->getNumOperands() > 1)) {
24294 SDValue Op0 = N->getOperand(0);
24295 auto checkElem = [&](SDValue Op) -> uint64_t {
24296 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
24297 (Op0.getOperand(0) == Op.getOperand(0)))
24298 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
24299 return CNode->getZExtValue();
24300 return -1;
24301 };
24302
24303 int Offset = checkElem(Op0);
24304 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
24305 if (Offset + i != checkElem(N->getOperand(i))) {
24306 Offset = -1;
24307 break;
24308 }
24309 }
24310
24311 if ((Offset == 0) &&
24312 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
24313 return Op0.getOperand(0);
24314 if ((Offset != -1) &&
24315 ((Offset % N->getValueType(0).getVectorNumElements()) ==
24316 0)) // IDX must be multiple of output size.
24317 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
24318 Op0.getOperand(0), Op0.getOperand(1));
24319 }
24320
24321 if (SDValue V = convertBuildVecZextToZext(N))
24322 return V;
24323
24324 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
24325 return V;
24326
24327 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
24328 return V;
24329
24330 if (SDValue V = reduceBuildVecTruncToBitCast(N))
24331 return V;
24332
24333 if (SDValue V = reduceBuildVecToShuffle(N))
24334 return V;
24335
24336 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
24337 // Do this late as some of the above may replace the splat.
24339 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
24340 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
24341 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
24342 }
24343
24344 return SDValue();
24345}
24346
24348 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24349 EVT OpVT = N->getOperand(0).getValueType();
24350
24351 // If the operands are legal vectors, leave them alone.
24352 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
24353 return SDValue();
24354
24355 SDLoc DL(N);
24356 EVT VT = N->getValueType(0);
24358 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
24359
24360 // Keep track of what we encounter.
24361 EVT AnyFPVT;
24362
24363 for (const SDValue &Op : N->ops()) {
24364 if (ISD::BITCAST == Op.getOpcode() &&
24365 !Op.getOperand(0).getValueType().isVector())
24366 Ops.push_back(Op.getOperand(0));
24367 else if (ISD::UNDEF == Op.getOpcode())
24368 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
24369 else
24370 return SDValue();
24371
24372 // Note whether we encounter an integer or floating point scalar.
24373 // If it's neither, bail out, it could be something weird like x86mmx.
24374 EVT LastOpVT = Ops.back().getValueType();
24375 if (LastOpVT.isFloatingPoint())
24376 AnyFPVT = LastOpVT;
24377 else if (!LastOpVT.isInteger())
24378 return SDValue();
24379 }
24380
24381 // If any of the operands is a floating point scalar bitcast to a vector,
24382 // use floating point types throughout, and bitcast everything.
24383 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
24384 if (AnyFPVT != EVT()) {
24385 SVT = AnyFPVT;
24386 for (SDValue &Op : Ops) {
24387 if (Op.getValueType() == SVT)
24388 continue;
24389 if (Op.isUndef())
24390 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
24391 else
24392 Op = DAG.getBitcast(SVT, Op);
24393 }
24394 }
24395
24396 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
24397 VT.getSizeInBits() / SVT.getSizeInBits());
24398 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
24399}
24400
24401// Attempt to merge nested concat_vectors/undefs.
24402// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
24403// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
24405 SelectionDAG &DAG) {
24406 EVT VT = N->getValueType(0);
24407
24408 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
24409 EVT SubVT;
24410 SDValue FirstConcat;
24411 for (const SDValue &Op : N->ops()) {
24412 if (Op.isUndef())
24413 continue;
24414 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
24415 return SDValue();
24416 if (!FirstConcat) {
24417 SubVT = Op.getOperand(0).getValueType();
24418 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
24419 return SDValue();
24420 FirstConcat = Op;
24421 continue;
24422 }
24423 if (SubVT != Op.getOperand(0).getValueType())
24424 return SDValue();
24425 }
24426 assert(FirstConcat && "Concat of all-undefs found");
24427
24428 SmallVector<SDValue> ConcatOps;
24429 for (const SDValue &Op : N->ops()) {
24430 if (Op.isUndef()) {
24431 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
24432 continue;
24433 }
24434 ConcatOps.append(Op->op_begin(), Op->op_end());
24435 }
24436 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
24437}
24438
24439// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
24440// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
24441// most two distinct vectors the same size as the result, attempt to turn this
24442// into a legal shuffle.
24444 EVT VT = N->getValueType(0);
24445 EVT OpVT = N->getOperand(0).getValueType();
24446
24447 // We currently can't generate an appropriate shuffle for a scalable vector.
24448 if (VT.isScalableVector())
24449 return SDValue();
24450
24451 int NumElts = VT.getVectorNumElements();
24452 int NumOpElts = OpVT.getVectorNumElements();
24453
24454 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
24456
24457 for (SDValue Op : N->ops()) {
24459
24460 // UNDEF nodes convert to UNDEF shuffle mask values.
24461 if (Op.isUndef()) {
24462 Mask.append((unsigned)NumOpElts, -1);
24463 continue;
24464 }
24465
24466 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24467 return SDValue();
24468
24469 // What vector are we extracting the subvector from and at what index?
24470 SDValue ExtVec = Op.getOperand(0);
24471 int ExtIdx = Op.getConstantOperandVal(1);
24472
24473 // We want the EVT of the original extraction to correctly scale the
24474 // extraction index.
24475 EVT ExtVT = ExtVec.getValueType();
24476 ExtVec = peekThroughBitcasts(ExtVec);
24477
24478 // UNDEF nodes convert to UNDEF shuffle mask values.
24479 if (ExtVec.isUndef()) {
24480 Mask.append((unsigned)NumOpElts, -1);
24481 continue;
24482 }
24483
24484 // Ensure that we are extracting a subvector from a vector the same
24485 // size as the result.
24486 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
24487 return SDValue();
24488
24489 // Scale the subvector index to account for any bitcast.
24490 int NumExtElts = ExtVT.getVectorNumElements();
24491 if (0 == (NumExtElts % NumElts))
24492 ExtIdx /= (NumExtElts / NumElts);
24493 else if (0 == (NumElts % NumExtElts))
24494 ExtIdx *= (NumElts / NumExtElts);
24495 else
24496 return SDValue();
24497
24498 // At most we can reference 2 inputs in the final shuffle.
24499 if (SV0.isUndef() || SV0 == ExtVec) {
24500 SV0 = ExtVec;
24501 for (int i = 0; i != NumOpElts; ++i)
24502 Mask.push_back(i + ExtIdx);
24503 } else if (SV1.isUndef() || SV1 == ExtVec) {
24504 SV1 = ExtVec;
24505 for (int i = 0; i != NumOpElts; ++i)
24506 Mask.push_back(i + ExtIdx + NumElts);
24507 } else {
24508 return SDValue();
24509 }
24510 }
24511
24512 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24513 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
24514 DAG.getBitcast(VT, SV1), Mask, DAG);
24515}
24516
24518 unsigned CastOpcode = N->getOperand(0).getOpcode();
24519 switch (CastOpcode) {
24520 case ISD::SINT_TO_FP:
24521 case ISD::UINT_TO_FP:
24522 case ISD::FP_TO_SINT:
24523 case ISD::FP_TO_UINT:
24524 // TODO: Allow more opcodes?
24525 // case ISD::BITCAST:
24526 // case ISD::TRUNCATE:
24527 // case ISD::ZERO_EXTEND:
24528 // case ISD::SIGN_EXTEND:
24529 // case ISD::FP_EXTEND:
24530 break;
24531 default:
24532 return SDValue();
24533 }
24534
24535 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
24536 if (!SrcVT.isVector())
24537 return SDValue();
24538
24539 // All operands of the concat must be the same kind of cast from the same
24540 // source type.
24542 for (SDValue Op : N->ops()) {
24543 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
24544 Op.getOperand(0).getValueType() != SrcVT)
24545 return SDValue();
24546 SrcOps.push_back(Op.getOperand(0));
24547 }
24548
24549 // The wider cast must be supported by the target. This is unusual because
24550 // the operation support type parameter depends on the opcode. In addition,
24551 // check the other type in the cast to make sure this is really legal.
24552 EVT VT = N->getValueType(0);
24553 EVT SrcEltVT = SrcVT.getVectorElementType();
24554 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
24555 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
24556 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24557 switch (CastOpcode) {
24558 case ISD::SINT_TO_FP:
24559 case ISD::UINT_TO_FP:
24560 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
24561 !TLI.isTypeLegal(VT))
24562 return SDValue();
24563 break;
24564 case ISD::FP_TO_SINT:
24565 case ISD::FP_TO_UINT:
24566 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
24567 !TLI.isTypeLegal(ConcatSrcVT))
24568 return SDValue();
24569 break;
24570 default:
24571 llvm_unreachable("Unexpected cast opcode");
24572 }
24573
24574 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
24575 SDLoc DL(N);
24576 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
24577 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
24578}
24579
24580// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
24581// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
24582// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
24584 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
24585 bool LegalOperations) {
24586 EVT VT = N->getValueType(0);
24587 EVT OpVT = N->getOperand(0).getValueType();
24588 if (VT.isScalableVector())
24589 return SDValue();
24590
24591 // For now, only allow simple 2-operand concatenations.
24592 if (N->getNumOperands() != 2)
24593 return SDValue();
24594
24595 // Don't create illegal types/shuffles when not allowed to.
24596 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
24597 (LegalOperations &&
24599 return SDValue();
24600
24601 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
24602 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
24603 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
24604 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
24605 // (4) and for now, the SHUFFLE_VECTOR must be unary.
24606 ShuffleVectorSDNode *SVN = nullptr;
24607 for (SDValue Op : N->ops()) {
24608 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
24609 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
24610 all_of(N->ops(), [CurSVN](SDValue Op) {
24611 // FIXME: can we allow UNDEF operands?
24612 return !Op.isUndef() &&
24613 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
24614 })) {
24615 SVN = CurSVN;
24616 break;
24617 }
24618 }
24619 if (!SVN)
24620 return SDValue();
24621
24622 // We are going to pad the shuffle operands, so any indice, that was picking
24623 // from the second operand, must be adjusted.
24624 SmallVector<int, 16> AdjustedMask;
24625 AdjustedMask.reserve(SVN->getMask().size());
24626 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
24627 append_range(AdjustedMask, SVN->getMask());
24628
24629 // Identity masks for the operands of the (padded) shuffle.
24630 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
24631 MutableArrayRef<int> FirstShufOpIdentityMask =
24632 MutableArrayRef<int>(IdentityMask)
24634 MutableArrayRef<int> SecondShufOpIdentityMask =
24636 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
24637 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
24639
24640 // New combined shuffle mask.
24642 Mask.reserve(VT.getVectorNumElements());
24643 for (SDValue Op : N->ops()) {
24644 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
24645 if (Op.getNode() == SVN) {
24646 append_range(Mask, AdjustedMask);
24647 continue;
24648 }
24649 if (Op == SVN->getOperand(0)) {
24650 append_range(Mask, FirstShufOpIdentityMask);
24651 continue;
24652 }
24653 if (Op == SVN->getOperand(1)) {
24654 append_range(Mask, SecondShufOpIdentityMask);
24655 continue;
24656 }
24657 llvm_unreachable("Unexpected operand!");
24658 }
24659
24660 // Don't create illegal shuffle masks.
24661 if (!TLI.isShuffleMaskLegal(Mask, VT))
24662 return SDValue();
24663
24664 // Pad the shuffle operands with UNDEF.
24665 SDLoc dl(N);
24666 std::array<SDValue, 2> ShufOps;
24667 for (auto I : zip(SVN->ops(), ShufOps)) {
24668 SDValue ShufOp = std::get<0>(I);
24669 SDValue &NewShufOp = std::get<1>(I);
24670 if (ShufOp.isUndef())
24671 NewShufOp = DAG.getUNDEF(VT);
24672 else {
24673 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
24674 DAG.getUNDEF(OpVT));
24675 ShufOpParts[0] = ShufOp;
24676 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
24677 }
24678 }
24679 // Finally, create the new wide shuffle.
24680 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
24681}
24682
24683SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
24684 // If we only have one input vector, we don't need to do any concatenation.
24685 if (N->getNumOperands() == 1)
24686 return N->getOperand(0);
24687
24688 // Check if all of the operands are undefs.
24689 EVT VT = N->getValueType(0);
24691 return DAG.getUNDEF(VT);
24692
24693 // Optimize concat_vectors where all but the first of the vectors are undef.
24694 if (all_of(drop_begin(N->ops()),
24695 [](const SDValue &Op) { return Op.isUndef(); })) {
24696 SDValue In = N->getOperand(0);
24697 assert(In.getValueType().isVector() && "Must concat vectors");
24698
24699 // If the input is a concat_vectors, just make a larger concat by padding
24700 // with smaller undefs.
24701 //
24702 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
24703 // here could cause an infinite loop. That legalizing happens when LegalDAG
24704 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
24705 // scalable.
24706 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
24707 !(LegalDAG && In.getValueType().isScalableVector())) {
24708 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
24709 SmallVector<SDValue, 4> Ops(In->ops());
24710 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
24711 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24712 }
24713
24715
24716 // concat_vectors(scalar_to_vector(scalar), undef) ->
24717 // scalar_to_vector(scalar)
24718 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24719 Scalar.hasOneUse()) {
24720 EVT SVT = Scalar.getValueType().getVectorElementType();
24721 if (SVT == Scalar.getOperand(0).getValueType())
24722 Scalar = Scalar.getOperand(0);
24723 }
24724
24725 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
24726 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
24727 // If the bitcast type isn't legal, it might be a trunc of a legal type;
24728 // look through the trunc so we can still do the transform:
24729 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
24730 if (Scalar->getOpcode() == ISD::TRUNCATE &&
24731 !TLI.isTypeLegal(Scalar.getValueType()) &&
24732 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
24733 Scalar = Scalar->getOperand(0);
24734
24735 EVT SclTy = Scalar.getValueType();
24736
24737 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
24738 return SDValue();
24739
24740 // Bail out if the vector size is not a multiple of the scalar size.
24741 if (VT.getSizeInBits() % SclTy.getSizeInBits())
24742 return SDValue();
24743
24744 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
24745 if (VNTNumElms < 2)
24746 return SDValue();
24747
24748 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
24749 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
24750 return SDValue();
24751
24752 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
24753 return DAG.getBitcast(VT, Res);
24754 }
24755 }
24756
24757 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
24758 // We have already tested above for an UNDEF only concatenation.
24759 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
24760 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
24761 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
24762 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
24763 };
24764 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
24766 EVT SVT = VT.getScalarType();
24767
24768 EVT MinVT = SVT;
24769 if (!SVT.isFloatingPoint()) {
24770 // If BUILD_VECTOR are from built from integer, they may have different
24771 // operand types. Get the smallest type and truncate all operands to it.
24772 bool FoundMinVT = false;
24773 for (const SDValue &Op : N->ops())
24774 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24775 EVT OpSVT = Op.getOperand(0).getValueType();
24776 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
24777 FoundMinVT = true;
24778 }
24779 assert(FoundMinVT && "Concat vector type mismatch");
24780 }
24781
24782 for (const SDValue &Op : N->ops()) {
24783 EVT OpVT = Op.getValueType();
24784 unsigned NumElts = OpVT.getVectorNumElements();
24785
24786 if (ISD::UNDEF == Op.getOpcode())
24787 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
24788
24789 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24790 if (SVT.isFloatingPoint()) {
24791 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
24792 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
24793 } else {
24794 for (unsigned i = 0; i != NumElts; ++i)
24795 Opnds.push_back(
24796 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
24797 }
24798 }
24799 }
24800
24801 assert(VT.getVectorNumElements() == Opnds.size() &&
24802 "Concat vector type mismatch");
24803 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
24804 }
24805
24806 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
24807 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
24809 return V;
24810
24811 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
24812 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
24814 return V;
24815
24816 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
24818 return V;
24819 }
24820
24821 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
24822 return V;
24823
24825 N, DAG, TLI, LegalTypes, LegalOperations))
24826 return V;
24827
24828 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
24829 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
24830 // operands and look for a CONCAT operations that place the incoming vectors
24831 // at the exact same location.
24832 //
24833 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
24834 SDValue SingleSource = SDValue();
24835 unsigned PartNumElem =
24836 N->getOperand(0).getValueType().getVectorMinNumElements();
24837
24838 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24839 SDValue Op = N->getOperand(i);
24840
24841 if (Op.isUndef())
24842 continue;
24843
24844 // Check if this is the identity extract:
24845 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24846 return SDValue();
24847
24848 // Find the single incoming vector for the extract_subvector.
24849 if (SingleSource.getNode()) {
24850 if (Op.getOperand(0) != SingleSource)
24851 return SDValue();
24852 } else {
24853 SingleSource = Op.getOperand(0);
24854
24855 // Check the source type is the same as the type of the result.
24856 // If not, this concat may extend the vector, so we can not
24857 // optimize it away.
24858 if (SingleSource.getValueType() != N->getValueType(0))
24859 return SDValue();
24860 }
24861
24862 // Check that we are reading from the identity index.
24863 unsigned IdentityIndex = i * PartNumElem;
24864 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
24865 return SDValue();
24866 }
24867
24868 if (SingleSource.getNode())
24869 return SingleSource;
24870
24871 return SDValue();
24872}
24873
24874// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
24875// if the subvector can be sourced for free.
24876static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
24877 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
24878 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
24879 return V.getOperand(1);
24880 }
24881 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24882 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
24883 V.getOperand(0).getValueType() == SubVT &&
24884 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24885 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24886 return V.getOperand(SubIdx);
24887 }
24888 return SDValue();
24889}
24890
24892 SelectionDAG &DAG,
24893 bool LegalOperations) {
24894 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24895 SDValue BinOp = Extract->getOperand(0);
24896 unsigned BinOpcode = BinOp.getOpcode();
24897 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24898 return SDValue();
24899
24900 EVT VecVT = BinOp.getValueType();
24901 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24902 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24903 return SDValue();
24904
24905 SDValue Index = Extract->getOperand(1);
24906 EVT SubVT = Extract->getValueType(0);
24907 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24908 return SDValue();
24909
24910 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24911 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24912
24913 // TODO: We could handle the case where only 1 operand is being inserted by
24914 // creating an extract of the other operand, but that requires checking
24915 // number of uses and/or costs.
24916 if (!Sub0 || !Sub1)
24917 return SDValue();
24918
24919 // We are inserting both operands of the wide binop only to extract back
24920 // to the narrow vector size. Eliminate all of the insert/extract:
24921 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24922 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24923 BinOp->getFlags());
24924}
24925
24926/// If we are extracting a subvector produced by a wide binary operator try
24927/// to use a narrow binary operator and/or avoid concatenation and extraction.
24929 bool LegalOperations) {
24930 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24931 // some of these bailouts with other transforms.
24932
24933 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24934 return V;
24935
24936 // The extract index must be a constant, so we can map it to a concat operand.
24937 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24938 if (!ExtractIndexC)
24939 return SDValue();
24940
24941 // We are looking for an optionally bitcasted wide vector binary operator
24942 // feeding an extract subvector.
24943 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24944 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24945 unsigned BOpcode = BinOp.getOpcode();
24946 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24947 return SDValue();
24948
24949 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24950 // reduced to the unary fneg when it is visited, and we probably want to deal
24951 // with fneg in a target-specific way.
24952 if (BOpcode == ISD::FSUB) {
24953 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24954 if (C && C->getValueAPF().isNegZero())
24955 return SDValue();
24956 }
24957
24958 // The binop must be a vector type, so we can extract some fraction of it.
24959 EVT WideBVT = BinOp.getValueType();
24960 // The optimisations below currently assume we are dealing with fixed length
24961 // vectors. It is possible to add support for scalable vectors, but at the
24962 // moment we've done no analysis to prove whether they are profitable or not.
24963 if (!WideBVT.isFixedLengthVector())
24964 return SDValue();
24965
24966 EVT VT = Extract->getValueType(0);
24967 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24968 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24969 "Extract index is not a multiple of the vector length.");
24970
24971 // Bail out if this is not a proper multiple width extraction.
24972 unsigned WideWidth = WideBVT.getSizeInBits();
24973 unsigned NarrowWidth = VT.getSizeInBits();
24974 if (WideWidth % NarrowWidth != 0)
24975 return SDValue();
24976
24977 // Bail out if we are extracting a fraction of a single operation. This can
24978 // occur because we potentially looked through a bitcast of the binop.
24979 unsigned NarrowingRatio = WideWidth / NarrowWidth;
24980 unsigned WideNumElts = WideBVT.getVectorNumElements();
24981 if (WideNumElts % NarrowingRatio != 0)
24982 return SDValue();
24983
24984 // Bail out if the target does not support a narrower version of the binop.
24985 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24986 WideNumElts / NarrowingRatio);
24987 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24988 LegalOperations))
24989 return SDValue();
24990
24991 // If extraction is cheap, we don't need to look at the binop operands
24992 // for concat ops. The narrow binop alone makes this transform profitable.
24993 // We can't just reuse the original extract index operand because we may have
24994 // bitcasted.
24995 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24996 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24997 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24998 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24999 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
25000 SDLoc DL(Extract);
25001 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
25002 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25003 BinOp.getOperand(0), NewExtIndex);
25004 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25005 BinOp.getOperand(1), NewExtIndex);
25006 SDValue NarrowBinOp =
25007 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
25008 return DAG.getBitcast(VT, NarrowBinOp);
25009 }
25010
25011 // Only handle the case where we are doubling and then halving. A larger ratio
25012 // may require more than two narrow binops to replace the wide binop.
25013 if (NarrowingRatio != 2)
25014 return SDValue();
25015
25016 // TODO: The motivating case for this transform is an x86 AVX1 target. That
25017 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
25018 // flavors, but no other 256-bit integer support. This could be extended to
25019 // handle any binop, but that may require fixing/adding other folds to avoid
25020 // codegen regressions.
25021 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
25022 return SDValue();
25023
25024 // We need at least one concatenation operation of a binop operand to make
25025 // this transform worthwhile. The concat must double the input vector sizes.
25026 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
25027 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
25028 return V.getOperand(ConcatOpNum);
25029 return SDValue();
25030 };
25031 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
25032 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
25033
25034 if (SubVecL || SubVecR) {
25035 // If a binop operand was not the result of a concat, we must extract a
25036 // half-sized operand for our new narrow binop:
25037 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
25038 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
25039 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
25040 SDLoc DL(Extract);
25041 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
25042 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
25043 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25044 BinOp.getOperand(0), IndexC);
25045
25046 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
25047 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25048 BinOp.getOperand(1), IndexC);
25049
25050 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
25051 return DAG.getBitcast(VT, NarrowBinOp);
25052 }
25053
25054 return SDValue();
25055}
25056
25057/// If we are extracting a subvector from a wide vector load, convert to a
25058/// narrow load to eliminate the extraction:
25059/// (extract_subvector (load wide vector)) --> (load narrow vector)
25061 // TODO: Add support for big-endian. The offset calculation must be adjusted.
25062 if (DAG.getDataLayout().isBigEndian())
25063 return SDValue();
25064
25065 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
25066 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
25067 return SDValue();
25068
25069 // Allow targets to opt-out.
25070 EVT VT = Extract->getValueType(0);
25071
25072 // We can only create byte sized loads.
25073 if (!VT.isByteSized())
25074 return SDValue();
25075
25076 unsigned Index = Extract->getConstantOperandVal(1);
25077 unsigned NumElts = VT.getVectorMinNumElements();
25078 // A fixed length vector being extracted from a scalable vector
25079 // may not be any *smaller* than the scalable one.
25080 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
25081 return SDValue();
25082
25083 // The definition of EXTRACT_SUBVECTOR states that the index must be a
25084 // multiple of the minimum number of elements in the result type.
25085 assert(Index % NumElts == 0 && "The extract subvector index is not a "
25086 "multiple of the result's element count");
25087
25088 // It's fine to use TypeSize here as we know the offset will not be negative.
25089 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
25090
25091 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25092 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
25093 return SDValue();
25094
25095 // The narrow load will be offset from the base address of the old load if
25096 // we are extracting from something besides index 0 (little-endian).
25097 SDLoc DL(Extract);
25098
25099 // TODO: Use "BaseIndexOffset" to make this more effective.
25100 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
25101
25104 MachineMemOperand *MMO;
25105 if (Offset.isScalable()) {
25106 MachinePointerInfo MPI =
25108 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
25109 } else
25110 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
25111 StoreSize);
25112
25113 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
25114 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
25115 return NewLd;
25116}
25117
25118/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
25119/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
25120/// EXTRACT_SUBVECTOR(Op?, ?),
25121/// Mask'))
25122/// iff it is legal and profitable to do so. Notably, the trimmed mask
25123/// (containing only the elements that are extracted)
25124/// must reference at most two subvectors.
25126 SelectionDAG &DAG,
25127 const TargetLowering &TLI,
25128 bool LegalOperations) {
25129 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
25130 "Must only be called on EXTRACT_SUBVECTOR's");
25131
25132 SDValue N0 = N->getOperand(0);
25133
25134 // Only deal with non-scalable vectors.
25135 EVT NarrowVT = N->getValueType(0);
25136 EVT WideVT = N0.getValueType();
25137 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
25138 return SDValue();
25139
25140 // The operand must be a shufflevector.
25141 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
25142 if (!WideShuffleVector)
25143 return SDValue();
25144
25145 // The old shuffleneeds to go away.
25146 if (!WideShuffleVector->hasOneUse())
25147 return SDValue();
25148
25149 // And the narrow shufflevector that we'll form must be legal.
25150 if (LegalOperations &&
25152 return SDValue();
25153
25154 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
25155 int NumEltsExtracted = NarrowVT.getVectorNumElements();
25156 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
25157 "Extract index is not a multiple of the output vector length.");
25158
25159 int WideNumElts = WideVT.getVectorNumElements();
25160
25161 SmallVector<int, 16> NewMask;
25162 NewMask.reserve(NumEltsExtracted);
25163 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
25164 DemandedSubvectors;
25165
25166 // Try to decode the wide mask into narrow mask from at most two subvectors.
25167 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
25168 NumEltsExtracted)) {
25169 assert((M >= -1) && (M < (2 * WideNumElts)) &&
25170 "Out-of-bounds shuffle mask?");
25171
25172 if (M < 0) {
25173 // Does not depend on operands, does not require adjustment.
25174 NewMask.emplace_back(M);
25175 continue;
25176 }
25177
25178 // From which operand of the shuffle does this shuffle mask element pick?
25179 int WideShufOpIdx = M / WideNumElts;
25180 // Which element of that operand is picked?
25181 int OpEltIdx = M % WideNumElts;
25182
25183 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
25184 "Shuffle mask vector decomposition failure.");
25185
25186 // And which NumEltsExtracted-sized subvector of that operand is that?
25187 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
25188 // And which element within that subvector of that operand is that?
25189 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
25190
25191 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
25192 "Shuffle mask subvector decomposition failure.");
25193
25194 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
25195 WideShufOpIdx * WideNumElts) == M &&
25196 "Shuffle mask full decomposition failure.");
25197
25198 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
25199
25200 if (Op.isUndef()) {
25201 // Picking from an undef operand. Let's adjust mask instead.
25202 NewMask.emplace_back(-1);
25203 continue;
25204 }
25205
25206 const std::pair<SDValue, int> DemandedSubvector =
25207 std::make_pair(Op, OpSubvecIdx);
25208
25209 if (DemandedSubvectors.insert(DemandedSubvector)) {
25210 if (DemandedSubvectors.size() > 2)
25211 return SDValue(); // We can't handle more than two subvectors.
25212 // How many elements into the WideVT does this subvector start?
25213 int Index = NumEltsExtracted * OpSubvecIdx;
25214 // Bail out if the extraction isn't going to be cheap.
25215 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
25216 return SDValue();
25217 }
25218
25219 // Ok, but from which operand of the new shuffle will this element pick?
25220 int NewOpIdx =
25221 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
25222 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
25223
25224 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
25225 NewMask.emplace_back(AdjM);
25226 }
25227 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
25228 assert(DemandedSubvectors.size() <= 2 &&
25229 "Should have ended up demanding at most two subvectors.");
25230
25231 // Did we discover that the shuffle does not actually depend on operands?
25232 if (DemandedSubvectors.empty())
25233 return DAG.getUNDEF(NarrowVT);
25234
25235 // Profitability check: only deal with extractions from the first subvector
25236 // unless the mask becomes an identity mask.
25237 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
25238 any_of(NewMask, [](int M) { return M < 0; }))
25239 for (auto &DemandedSubvector : DemandedSubvectors)
25240 if (DemandedSubvector.second != 0)
25241 return SDValue();
25242
25243 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
25244 // operand[s]/index[es], so there is no point in checking for it's legality.
25245
25246 // Do not turn a legal shuffle into an illegal one.
25247 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
25248 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
25249 return SDValue();
25250
25251 SDLoc DL(N);
25252
25254 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
25255 &DemandedSubvector : DemandedSubvectors) {
25256 // How many elements into the WideVT does this subvector start?
25257 int Index = NumEltsExtracted * DemandedSubvector.second;
25258 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
25259 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
25260 DemandedSubvector.first, IndexC));
25261 }
25262 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
25263 "Should end up with either one or two ops");
25264
25265 // If we ended up with only one operand, pad with an undef.
25266 if (NewOps.size() == 1)
25267 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
25268
25269 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
25270}
25271
25272SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
25273 EVT NVT = N->getValueType(0);
25274 SDValue V = N->getOperand(0);
25275 uint64_t ExtIdx = N->getConstantOperandVal(1);
25276 SDLoc DL(N);
25277
25278 // Extract from UNDEF is UNDEF.
25279 if (V.isUndef())
25280 return DAG.getUNDEF(NVT);
25281
25283 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
25284 return NarrowLoad;
25285
25286 // Combine an extract of an extract into a single extract_subvector.
25287 // ext (ext X, C), 0 --> ext X, C
25288 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
25289 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
25290 V.getConstantOperandVal(1)) &&
25292 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
25293 V.getOperand(1));
25294 }
25295 }
25296
25297 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
25298 if (V.getOpcode() == ISD::SPLAT_VECTOR)
25299 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
25300 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
25301 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
25302
25303 // extract_subvector(insert_subvector(x,y,c1),c2)
25304 // --> extract_subvector(y,c2-c1)
25305 // iff we're just extracting from the inserted subvector.
25306 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
25307 SDValue InsSub = V.getOperand(1);
25308 EVT InsSubVT = InsSub.getValueType();
25309 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
25310 unsigned InsIdx = V.getConstantOperandVal(2);
25311 unsigned NumSubElts = NVT.getVectorMinNumElements();
25312 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
25313 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
25314 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
25315 V.getValueType().isFixedLengthVector())
25316 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
25317 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
25318 }
25319
25320 // Try to move vector bitcast after extract_subv by scaling extraction index:
25321 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
25322 if (V.getOpcode() == ISD::BITCAST &&
25323 V.getOperand(0).getValueType().isVector() &&
25324 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
25325 SDValue SrcOp = V.getOperand(0);
25326 EVT SrcVT = SrcOp.getValueType();
25327 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
25328 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
25329 if ((SrcNumElts % DestNumElts) == 0) {
25330 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
25331 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
25332 EVT NewExtVT =
25333 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
25335 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
25336 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
25337 V.getOperand(0), NewIndex);
25338 return DAG.getBitcast(NVT, NewExtract);
25339 }
25340 }
25341 if ((DestNumElts % SrcNumElts) == 0) {
25342 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
25343 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
25344 ElementCount NewExtEC =
25345 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
25346 EVT ScalarVT = SrcVT.getScalarType();
25347 if ((ExtIdx % DestSrcRatio) == 0) {
25348 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
25349 EVT NewExtVT =
25350 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
25352 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
25353 SDValue NewExtract =
25354 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
25355 V.getOperand(0), NewIndex);
25356 return DAG.getBitcast(NVT, NewExtract);
25357 }
25358 if (NewExtEC.isScalar() &&
25360 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
25361 SDValue NewExtract =
25362 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
25363 V.getOperand(0), NewIndex);
25364 return DAG.getBitcast(NVT, NewExtract);
25365 }
25366 }
25367 }
25368 }
25369 }
25370
25371 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
25372 unsigned ExtNumElts = NVT.getVectorMinNumElements();
25373 EVT ConcatSrcVT = V.getOperand(0).getValueType();
25374 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
25375 "Concat and extract subvector do not change element type");
25376 assert((ExtIdx % ExtNumElts) == 0 &&
25377 "Extract index is not a multiple of the input vector length.");
25378
25379 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
25380 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
25381
25382 // If the concatenated source types match this extract, it's a direct
25383 // simplification:
25384 // extract_subvec (concat V1, V2, ...), i --> Vi
25385 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
25386 return V.getOperand(ConcatOpIdx);
25387
25388 // If the concatenated source vectors are a multiple length of this extract,
25389 // then extract a fraction of one of those source vectors directly from a
25390 // concat operand. Example:
25391 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
25392 // v2i8 extract_subvec v8i8 Y, 6
25393 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
25394 ConcatSrcNumElts % ExtNumElts == 0) {
25395 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
25396 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
25397 "Trying to extract from >1 concat operand?");
25398 assert(NewExtIdx % ExtNumElts == 0 &&
25399 "Extract index is not a multiple of the input vector length.");
25400 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
25401 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
25402 V.getOperand(ConcatOpIdx), NewIndexC);
25403 }
25404 }
25405
25406 if (SDValue V =
25407 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
25408 return V;
25409
25411
25412 // If the input is a build vector. Try to make a smaller build vector.
25413 if (V.getOpcode() == ISD::BUILD_VECTOR) {
25414 EVT InVT = V.getValueType();
25415 unsigned ExtractSize = NVT.getSizeInBits();
25416 unsigned EltSize = InVT.getScalarSizeInBits();
25417 // Only do this if we won't split any elements.
25418 if (ExtractSize % EltSize == 0) {
25419 unsigned NumElems = ExtractSize / EltSize;
25420 EVT EltVT = InVT.getVectorElementType();
25421 EVT ExtractVT =
25422 NumElems == 1 ? EltVT
25423 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
25424 if ((Level < AfterLegalizeDAG ||
25425 (NumElems == 1 ||
25426 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
25427 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
25428 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
25429
25430 if (NumElems == 1) {
25431 SDValue Src = V->getOperand(IdxVal);
25432 if (EltVT != Src.getValueType())
25433 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
25434 return DAG.getBitcast(NVT, Src);
25435 }
25436
25437 // Extract the pieces from the original build_vector.
25438 SDValue BuildVec =
25439 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
25440 return DAG.getBitcast(NVT, BuildVec);
25441 }
25442 }
25443 }
25444
25445 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
25446 // Handle only simple case where vector being inserted and vector
25447 // being extracted are of same size.
25448 EVT SmallVT = V.getOperand(1).getValueType();
25449 if (!NVT.bitsEq(SmallVT))
25450 return SDValue();
25451
25452 // Combine:
25453 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
25454 // Into:
25455 // indices are equal or bit offsets are equal => V1
25456 // otherwise => (extract_subvec V1, ExtIdx)
25457 uint64_t InsIdx = V.getConstantOperandVal(2);
25458 if (InsIdx * SmallVT.getScalarSizeInBits() ==
25459 ExtIdx * NVT.getScalarSizeInBits()) {
25460 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
25461 return SDValue();
25462
25463 return DAG.getBitcast(NVT, V.getOperand(1));
25464 }
25465 return DAG.getNode(
25467 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
25468 N->getOperand(1));
25469 }
25470
25471 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
25472 return NarrowBOp;
25473
25475 return SDValue(N, 0);
25476
25477 return SDValue();
25478}
25479
25480/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
25481/// followed by concatenation. Narrow vector ops may have better performance
25482/// than wide ops, and this can unlock further narrowing of other vector ops.
25483/// Targets can invert this transform later if it is not profitable.
25485 SelectionDAG &DAG) {
25486 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
25487 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
25488 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
25489 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
25490 return SDValue();
25491
25492 // Split the wide shuffle mask into halves. Any mask element that is accessing
25493 // operand 1 is offset down to account for narrowing of the vectors.
25494 ArrayRef<int> Mask = Shuf->getMask();
25495 EVT VT = Shuf->getValueType(0);
25496 unsigned NumElts = VT.getVectorNumElements();
25497 unsigned HalfNumElts = NumElts / 2;
25498 SmallVector<int, 16> Mask0(HalfNumElts, -1);
25499 SmallVector<int, 16> Mask1(HalfNumElts, -1);
25500 for (unsigned i = 0; i != NumElts; ++i) {
25501 if (Mask[i] == -1)
25502 continue;
25503 // If we reference the upper (undef) subvector then the element is undef.
25504 if ((Mask[i] % NumElts) >= HalfNumElts)
25505 continue;
25506 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
25507 if (i < HalfNumElts)
25508 Mask0[i] = M;
25509 else
25510 Mask1[i - HalfNumElts] = M;
25511 }
25512
25513 // Ask the target if this is a valid transform.
25514 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25515 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
25516 HalfNumElts);
25517 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
25518 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
25519 return SDValue();
25520
25521 // shuffle (concat X, undef), (concat Y, undef), Mask -->
25522 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
25523 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
25524 SDLoc DL(Shuf);
25525 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
25526 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
25527 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
25528}
25529
25530// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
25531// or turn a shuffle of a single concat into simpler shuffle then concat.
25533 EVT VT = N->getValueType(0);
25534 unsigned NumElts = VT.getVectorNumElements();
25535
25536 SDValue N0 = N->getOperand(0);
25537 SDValue N1 = N->getOperand(1);
25538 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25539 ArrayRef<int> Mask = SVN->getMask();
25540
25542 EVT ConcatVT = N0.getOperand(0).getValueType();
25543 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
25544 unsigned NumConcats = NumElts / NumElemsPerConcat;
25545
25546 auto IsUndefMaskElt = [](int i) { return i == -1; };
25547
25548 // Special case: shuffle(concat(A,B)) can be more efficiently represented
25549 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
25550 // half vector elements.
25551 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
25552 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
25553 IsUndefMaskElt)) {
25554 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
25555 N0.getOperand(1),
25556 Mask.slice(0, NumElemsPerConcat));
25557 N1 = DAG.getUNDEF(ConcatVT);
25558 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
25559 }
25560
25561 // Look at every vector that's inserted. We're looking for exact
25562 // subvector-sized copies from a concatenated vector
25563 for (unsigned I = 0; I != NumConcats; ++I) {
25564 unsigned Begin = I * NumElemsPerConcat;
25565 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
25566
25567 // Make sure we're dealing with a copy.
25568 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
25569 Ops.push_back(DAG.getUNDEF(ConcatVT));
25570 continue;
25571 }
25572
25573 int OpIdx = -1;
25574 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
25575 if (IsUndefMaskElt(SubMask[i]))
25576 continue;
25577 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
25578 return SDValue();
25579 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
25580 if (0 <= OpIdx && EltOpIdx != OpIdx)
25581 return SDValue();
25582 OpIdx = EltOpIdx;
25583 }
25584 assert(0 <= OpIdx && "Unknown concat_vectors op");
25585
25586 if (OpIdx < (int)N0.getNumOperands())
25587 Ops.push_back(N0.getOperand(OpIdx));
25588 else
25589 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
25590 }
25591
25592 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
25593}
25594
25595// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25596// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25597//
25598// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
25599// a simplification in some sense, but it isn't appropriate in general: some
25600// BUILD_VECTORs are substantially cheaper than others. The general case
25601// of a BUILD_VECTOR requires inserting each element individually (or
25602// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
25603// all constants is a single constant pool load. A BUILD_VECTOR where each
25604// element is identical is a splat. A BUILD_VECTOR where most of the operands
25605// are undef lowers to a small number of element insertions.
25606//
25607// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
25608// We don't fold shuffles where one side is a non-zero constant, and we don't
25609// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
25610// non-constant operands. This seems to work out reasonably well in practice.
25612 SelectionDAG &DAG,
25613 const TargetLowering &TLI) {
25614 EVT VT = SVN->getValueType(0);
25615 unsigned NumElts = VT.getVectorNumElements();
25616 SDValue N0 = SVN->getOperand(0);
25617 SDValue N1 = SVN->getOperand(1);
25618
25619 if (!N0->hasOneUse())
25620 return SDValue();
25621
25622 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
25623 // discussed above.
25624 if (!N1.isUndef()) {
25625 if (!N1->hasOneUse())
25626 return SDValue();
25627
25628 bool N0AnyConst = isAnyConstantBuildVector(N0);
25629 bool N1AnyConst = isAnyConstantBuildVector(N1);
25630 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
25631 return SDValue();
25632 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
25633 return SDValue();
25634 }
25635
25636 // If both inputs are splats of the same value then we can safely merge this
25637 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
25638 bool IsSplat = false;
25639 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
25640 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
25641 if (BV0 && BV1)
25642 if (SDValue Splat0 = BV0->getSplatValue())
25643 IsSplat = (Splat0 == BV1->getSplatValue());
25644
25646 SmallSet<SDValue, 16> DuplicateOps;
25647 for (int M : SVN->getMask()) {
25648 SDValue Op = DAG.getUNDEF(VT.getScalarType());
25649 if (M >= 0) {
25650 int Idx = M < (int)NumElts ? M : M - NumElts;
25651 SDValue &S = (M < (int)NumElts ? N0 : N1);
25652 if (S.getOpcode() == ISD::BUILD_VECTOR) {
25653 Op = S.getOperand(Idx);
25654 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
25655 SDValue Op0 = S.getOperand(0);
25656 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
25657 } else {
25658 // Operand can't be combined - bail out.
25659 return SDValue();
25660 }
25661 }
25662
25663 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
25664 // generating a splat; semantically, this is fine, but it's likely to
25665 // generate low-quality code if the target can't reconstruct an appropriate
25666 // shuffle.
25667 if (!Op.isUndef() && !isIntOrFPConstant(Op))
25668 if (!IsSplat && !DuplicateOps.insert(Op).second)
25669 return SDValue();
25670
25671 Ops.push_back(Op);
25672 }
25673
25674 // BUILD_VECTOR requires all inputs to be of the same type, find the
25675 // maximum type and extend them all.
25676 EVT SVT = VT.getScalarType();
25677 if (SVT.isInteger())
25678 for (SDValue &Op : Ops)
25679 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
25680 if (SVT != VT.getScalarType())
25681 for (SDValue &Op : Ops)
25682 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
25683 : (TLI.isZExtFree(Op.getValueType(), SVT)
25684 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
25685 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
25686 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
25687}
25688
25689// Match shuffles that can be converted to *_vector_extend_in_reg.
25690// This is often generated during legalization.
25691// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
25692// and returns the EVT to which the extension should be performed.
25693// NOTE: this assumes that the src is the first operand of the shuffle.
25695 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
25696 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25697 bool LegalOperations) {
25698 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25699
25700 // TODO Add support for big-endian when we have a test case.
25701 if (!VT.isInteger() || IsBigEndian)
25702 return std::nullopt;
25703
25704 unsigned NumElts = VT.getVectorNumElements();
25705 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25706
25707 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
25708 // power-of-2 extensions as they are the most likely.
25709 // FIXME: should try Scale == NumElts case too,
25710 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
25711 // The vector width must be a multiple of Scale.
25712 if (NumElts % Scale != 0)
25713 continue;
25714
25715 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
25716 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
25717
25718 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
25719 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
25720 continue;
25721
25722 if (Match(Scale))
25723 return OutVT;
25724 }
25725
25726 return std::nullopt;
25727}
25728
25729// Match shuffles that can be converted to any_vector_extend_in_reg.
25730// This is often generated during legalization.
25731// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
25733 SelectionDAG &DAG,
25734 const TargetLowering &TLI,
25735 bool LegalOperations) {
25736 EVT VT = SVN->getValueType(0);
25737 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25738
25739 // TODO Add support for big-endian when we have a test case.
25740 if (!VT.isInteger() || IsBigEndian)
25741 return SDValue();
25742
25743 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
25744 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
25745 Mask = SVN->getMask()](unsigned Scale) {
25746 for (unsigned i = 0; i != NumElts; ++i) {
25747 if (Mask[i] < 0)
25748 continue;
25749 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
25750 continue;
25751 return false;
25752 }
25753 return true;
25754 };
25755
25756 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
25757 SDValue N0 = SVN->getOperand(0);
25758 // Never create an illegal type. Only create unsupported operations if we
25759 // are pre-legalization.
25760 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25761 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
25762 if (!OutVT)
25763 return SDValue();
25764 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
25765}
25766
25767// Match shuffles that can be converted to zero_extend_vector_inreg.
25768// This is often generated during legalization.
25769// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
25771 SelectionDAG &DAG,
25772 const TargetLowering &TLI,
25773 bool LegalOperations) {
25774 bool LegalTypes = true;
25775 EVT VT = SVN->getValueType(0);
25776 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
25777 unsigned NumElts = VT.getVectorNumElements();
25778 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25779
25780 // TODO: add support for big-endian when we have a test case.
25781 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25782 if (!VT.isInteger() || IsBigEndian)
25783 return SDValue();
25784
25785 SmallVector<int, 16> Mask(SVN->getMask());
25786 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
25787 for (int &Indice : Mask) {
25788 if (Indice < 0)
25789 continue;
25790 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
25791 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
25792 Fn(Indice, OpIdx, OpEltIdx);
25793 }
25794 };
25795
25796 // Which elements of which operand does this shuffle demand?
25797 std::array<APInt, 2> OpsDemandedElts;
25798 for (APInt &OpDemandedElts : OpsDemandedElts)
25799 OpDemandedElts = APInt::getZero(NumElts);
25800 ForEachDecomposedIndice(
25801 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
25802 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
25803 });
25804
25805 // Element-wise(!), which of these demanded elements are know to be zero?
25806 std::array<APInt, 2> OpsKnownZeroElts;
25807 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
25808 std::get<2>(I) =
25809 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
25810
25811 // Manifest zeroable element knowledge in the shuffle mask.
25812 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
25813 // this is a local invention, but it won't leak into DAG.
25814 // FIXME: should we not manifest them, but just check when matching?
25815 bool HadZeroableElts = false;
25816 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
25817 int &Indice, int OpIdx, int OpEltIdx) {
25818 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
25819 Indice = -2; // Zeroable element.
25820 HadZeroableElts = true;
25821 }
25822 });
25823
25824 // Don't proceed unless we've refined at least one zeroable mask indice.
25825 // If we didn't, then we are still trying to match the same shuffle mask
25826 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
25827 // and evidently failed. Proceeding will lead to endless combine loops.
25828 if (!HadZeroableElts)
25829 return SDValue();
25830
25831 // The shuffle may be more fine-grained than we want. Widen elements first.
25832 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
25833 SmallVector<int, 16> ScaledMask;
25834 getShuffleMaskWithWidestElts(Mask, ScaledMask);
25835 assert(Mask.size() >= ScaledMask.size() &&
25836 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
25837 int Prescale = Mask.size() / ScaledMask.size();
25838
25839 NumElts = ScaledMask.size();
25840 EltSizeInBits *= Prescale;
25841
25842 EVT PrescaledVT = EVT::getVectorVT(
25843 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
25844 NumElts);
25845
25846 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
25847 return SDValue();
25848
25849 // For example,
25850 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
25851 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
25852 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
25853 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
25854 "Unexpected mask scaling factor.");
25855 ArrayRef<int> Mask = ScaledMask;
25856 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
25857 SrcElt != NumSrcElts; ++SrcElt) {
25858 // Analyze the shuffle mask in Scale-sized chunks.
25859 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
25860 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
25861 Mask = Mask.drop_front(MaskChunk.size());
25862 // The first indice in this chunk must be SrcElt, but not zero!
25863 // FIXME: undef should be fine, but that results in more-defined result.
25864 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
25865 return false;
25866 // The rest of the indices in this chunk must be zeros.
25867 // FIXME: undef should be fine, but that results in more-defined result.
25868 if (!all_of(MaskChunk.drop_front(1),
25869 [](int Indice) { return Indice == -2; }))
25870 return false;
25871 }
25872 assert(Mask.empty() && "Did not process the whole mask?");
25873 return true;
25874 };
25875
25876 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
25877 for (bool Commuted : {false, true}) {
25878 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
25879 if (Commuted)
25881 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25882 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
25883 LegalOperations);
25884 if (OutVT)
25885 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
25886 DAG.getBitcast(PrescaledVT, Op)));
25887 }
25888 return SDValue();
25889}
25890
25891// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
25892// each source element of a large type into the lowest elements of a smaller
25893// destination type. This is often generated during legalization.
25894// If the source node itself was a '*_extend_vector_inreg' node then we should
25895// then be able to remove it.
25897 SelectionDAG &DAG) {
25898 EVT VT = SVN->getValueType(0);
25899 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25900
25901 // TODO Add support for big-endian when we have a test case.
25902 if (!VT.isInteger() || IsBigEndian)
25903 return SDValue();
25904
25906
25907 unsigned Opcode = N0.getOpcode();
25908 if (!ISD::isExtVecInRegOpcode(Opcode))
25909 return SDValue();
25910
25911 SDValue N00 = N0.getOperand(0);
25912 ArrayRef<int> Mask = SVN->getMask();
25913 unsigned NumElts = VT.getVectorNumElements();
25914 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25915 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25916 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25917
25918 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25919 return SDValue();
25920 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25921
25922 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25923 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25924 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25925 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25926 for (unsigned i = 0; i != NumElts; ++i) {
25927 if (Mask[i] < 0)
25928 continue;
25929 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25930 continue;
25931 return false;
25932 }
25933 return true;
25934 };
25935
25936 // At the moment we just handle the case where we've truncated back to the
25937 // same size as before the extension.
25938 // TODO: handle more extension/truncation cases as cases arise.
25939 if (EltSizeInBits != ExtSrcSizeInBits)
25940 return SDValue();
25941
25942 // We can remove *extend_vector_inreg only if the truncation happens at
25943 // the same scale as the extension.
25944 if (isTruncate(ExtScale))
25945 return DAG.getBitcast(VT, N00);
25946
25947 return SDValue();
25948}
25949
25950// Combine shuffles of splat-shuffles of the form:
25951// shuffle (shuffle V, undef, splat-mask), undef, M
25952// If splat-mask contains undef elements, we need to be careful about
25953// introducing undef's in the folded mask which are not the result of composing
25954// the masks of the shuffles.
25956 SelectionDAG &DAG) {
25957 EVT VT = Shuf->getValueType(0);
25958 unsigned NumElts = VT.getVectorNumElements();
25959
25960 if (!Shuf->getOperand(1).isUndef())
25961 return SDValue();
25962
25963 // See if this unary non-splat shuffle actually *is* a splat shuffle,
25964 // in disguise, with all demanded elements being identical.
25965 // FIXME: this can be done per-operand.
25966 if (!Shuf->isSplat()) {
25967 APInt DemandedElts(NumElts, 0);
25968 for (int Idx : Shuf->getMask()) {
25969 if (Idx < 0)
25970 continue; // Ignore sentinel indices.
25971 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25972 DemandedElts.setBit(Idx);
25973 }
25974 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25975 APInt UndefElts;
25976 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25977 // Even if all demanded elements are splat, some of them could be undef.
25978 // Which lowest demanded element is *not* known-undef?
25979 std::optional<unsigned> MinNonUndefIdx;
25980 for (int Idx : Shuf->getMask()) {
25981 if (Idx < 0 || UndefElts[Idx])
25982 continue; // Ignore sentinel indices, and undef elements.
25983 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25984 }
25985 if (!MinNonUndefIdx)
25986 return DAG.getUNDEF(VT); // All undef - result is undef.
25987 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25988 SmallVector<int, 8> SplatMask(Shuf->getMask());
25989 for (int &Idx : SplatMask) {
25990 if (Idx < 0)
25991 continue; // Passthrough sentinel indices.
25992 // Otherwise, just pick the lowest demanded non-undef element.
25993 // Or sentinel undef, if we know we'd pick a known-undef element.
25994 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25995 }
25996 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25997 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25998 Shuf->getOperand(1), SplatMask);
25999 }
26000 }
26001
26002 // If the inner operand is a known splat with no undefs, just return that directly.
26003 // TODO: Create DemandedElts mask from Shuf's mask.
26004 // TODO: Allow undef elements and merge with the shuffle code below.
26005 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
26006 return Shuf->getOperand(0);
26007
26008 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
26009 if (!Splat || !Splat->isSplat())
26010 return SDValue();
26011
26012 ArrayRef<int> ShufMask = Shuf->getMask();
26013 ArrayRef<int> SplatMask = Splat->getMask();
26014 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
26015
26016 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
26017 // every undef mask element in the splat-shuffle has a corresponding undef
26018 // element in the user-shuffle's mask or if the composition of mask elements
26019 // would result in undef.
26020 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
26021 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
26022 // In this case it is not legal to simplify to the splat-shuffle because we
26023 // may be exposing the users of the shuffle an undef element at index 1
26024 // which was not there before the combine.
26025 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
26026 // In this case the composition of masks yields SplatMask, so it's ok to
26027 // simplify to the splat-shuffle.
26028 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
26029 // In this case the composed mask includes all undef elements of SplatMask
26030 // and in addition sets element zero to undef. It is safe to simplify to
26031 // the splat-shuffle.
26032 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
26033 ArrayRef<int> SplatMask) {
26034 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
26035 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
26036 SplatMask[UserMask[i]] != -1)
26037 return false;
26038 return true;
26039 };
26040 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
26041 return Shuf->getOperand(0);
26042
26043 // Create a new shuffle with a mask that is composed of the two shuffles'
26044 // masks.
26045 SmallVector<int, 32> NewMask;
26046 for (int Idx : ShufMask)
26047 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
26048
26049 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
26050 Splat->getOperand(0), Splat->getOperand(1),
26051 NewMask);
26052}
26053
26054// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
26055// the mask can be treated as a larger type.
26057 SelectionDAG &DAG,
26058 const TargetLowering &TLI,
26059 bool LegalOperations) {
26060 SDValue Op0 = SVN->getOperand(0);
26061 SDValue Op1 = SVN->getOperand(1);
26062 EVT VT = SVN->getValueType(0);
26063 if (Op0.getOpcode() != ISD::BITCAST)
26064 return SDValue();
26065 EVT InVT = Op0.getOperand(0).getValueType();
26066 if (!InVT.isVector() ||
26067 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
26068 Op1.getOperand(0).getValueType() != InVT)))
26069 return SDValue();
26071 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
26072 return SDValue();
26073
26074 int VTLanes = VT.getVectorNumElements();
26075 int InLanes = InVT.getVectorNumElements();
26076 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
26077 (LegalOperations &&
26079 return SDValue();
26080 int Factor = VTLanes / InLanes;
26081
26082 // Check that each group of lanes in the mask are either undef or make a valid
26083 // mask for the wider lane type.
26084 ArrayRef<int> Mask = SVN->getMask();
26085 SmallVector<int> NewMask;
26086 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
26087 return SDValue();
26088
26089 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
26090 return SDValue();
26091
26092 // Create the new shuffle with the new mask and bitcast it back to the
26093 // original type.
26094 SDLoc DL(SVN);
26095 Op0 = Op0.getOperand(0);
26096 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
26097 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
26098 return DAG.getBitcast(VT, NewShuf);
26099}
26100
26101/// Combine shuffle of shuffle of the form:
26102/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
26104 SelectionDAG &DAG) {
26105 if (!OuterShuf->getOperand(1).isUndef())
26106 return SDValue();
26107 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
26108 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
26109 return SDValue();
26110
26111 ArrayRef<int> OuterMask = OuterShuf->getMask();
26112 ArrayRef<int> InnerMask = InnerShuf->getMask();
26113 unsigned NumElts = OuterMask.size();
26114 assert(NumElts == InnerMask.size() && "Mask length mismatch");
26115 SmallVector<int, 32> CombinedMask(NumElts, -1);
26116 int SplatIndex = -1;
26117 for (unsigned i = 0; i != NumElts; ++i) {
26118 // Undef lanes remain undef.
26119 int OuterMaskElt = OuterMask[i];
26120 if (OuterMaskElt == -1)
26121 continue;
26122
26123 // Peek through the shuffle masks to get the underlying source element.
26124 int InnerMaskElt = InnerMask[OuterMaskElt];
26125 if (InnerMaskElt == -1)
26126 continue;
26127
26128 // Initialize the splatted element.
26129 if (SplatIndex == -1)
26130 SplatIndex = InnerMaskElt;
26131
26132 // Non-matching index - this is not a splat.
26133 if (SplatIndex != InnerMaskElt)
26134 return SDValue();
26135
26136 CombinedMask[i] = InnerMaskElt;
26137 }
26138 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
26139 getSplatIndex(CombinedMask) != -1) &&
26140 "Expected a splat mask");
26141
26142 // TODO: The transform may be a win even if the mask is not legal.
26143 EVT VT = OuterShuf->getValueType(0);
26144 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
26145 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
26146 return SDValue();
26147
26148 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
26149 InnerShuf->getOperand(1), CombinedMask);
26150}
26151
26152/// If the shuffle mask is taking exactly one element from the first vector
26153/// operand and passing through all other elements from the second vector
26154/// operand, return the index of the mask element that is choosing an element
26155/// from the first operand. Otherwise, return -1.
26157 int MaskSize = Mask.size();
26158 int EltFromOp0 = -1;
26159 // TODO: This does not match if there are undef elements in the shuffle mask.
26160 // Should we ignore undefs in the shuffle mask instead? The trade-off is
26161 // removing an instruction (a shuffle), but losing the knowledge that some
26162 // vector lanes are not needed.
26163 for (int i = 0; i != MaskSize; ++i) {
26164 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
26165 // We're looking for a shuffle of exactly one element from operand 0.
26166 if (EltFromOp0 != -1)
26167 return -1;
26168 EltFromOp0 = i;
26169 } else if (Mask[i] != i + MaskSize) {
26170 // Nothing from operand 1 can change lanes.
26171 return -1;
26172 }
26173 }
26174 return EltFromOp0;
26175}
26176
26177/// If a shuffle inserts exactly one element from a source vector operand into
26178/// another vector operand and we can access the specified element as a scalar,
26179/// then we can eliminate the shuffle.
26181 SelectionDAG &DAG) {
26182 // First, check if we are taking one element of a vector and shuffling that
26183 // element into another vector.
26184 ArrayRef<int> Mask = Shuf->getMask();
26185 SmallVector<int, 16> CommutedMask(Mask);
26186 SDValue Op0 = Shuf->getOperand(0);
26187 SDValue Op1 = Shuf->getOperand(1);
26188 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
26189 if (ShufOp0Index == -1) {
26190 // Commute mask and check again.
26192 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
26193 if (ShufOp0Index == -1)
26194 return SDValue();
26195 // Commute operands to match the commuted shuffle mask.
26196 std::swap(Op0, Op1);
26197 Mask = CommutedMask;
26198 }
26199
26200 // The shuffle inserts exactly one element from operand 0 into operand 1.
26201 // Now see if we can access that element as a scalar via a real insert element
26202 // instruction.
26203 // TODO: We can try harder to locate the element as a scalar. Examples: it
26204 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
26205 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
26206 "Shuffle mask value must be from operand 0");
26207
26208 SDValue Elt;
26209 if (sd_match(Op0, m_InsertElt(m_Value(), m_Value(Elt),
26210 m_SpecificInt(Mask[ShufOp0Index])))) {
26211 // There's an existing insertelement with constant insertion index, so we
26212 // don't need to check the legality/profitability of a replacement operation
26213 // that differs at most in the constant value. The target should be able to
26214 // lower any of those in a similar way. If not, legalization will expand
26215 // this to a scalar-to-vector plus shuffle.
26216 //
26217 // Note that the shuffle may move the scalar from the position that the
26218 // insert element used. Therefore, our new insert element occurs at the
26219 // shuffle's mask index value, not the insert's index value.
26220 //
26221 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
26222 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
26223 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
26224 Op1, Elt, NewInsIndex);
26225 }
26226
26227 return SDValue();
26228}
26229
26230/// If we have a unary shuffle of a shuffle, see if it can be folded away
26231/// completely. This has the potential to lose undef knowledge because the first
26232/// shuffle may not have an undef mask element where the second one does. So
26233/// only call this after doing simplifications based on demanded elements.
26235 // shuf (shuf0 X, Y, Mask0), undef, Mask
26236 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
26237 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
26238 return SDValue();
26239
26240 ArrayRef<int> Mask = Shuf->getMask();
26241 ArrayRef<int> Mask0 = Shuf0->getMask();
26242 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
26243 // Ignore undef elements.
26244 if (Mask[i] == -1)
26245 continue;
26246 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
26247
26248 // Is the element of the shuffle operand chosen by this shuffle the same as
26249 // the element chosen by the shuffle operand itself?
26250 if (Mask0[Mask[i]] != Mask0[i])
26251 return SDValue();
26252 }
26253 // Every element of this shuffle is identical to the result of the previous
26254 // shuffle, so we can replace this value.
26255 return Shuf->getOperand(0);
26256}
26257
26258SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
26259 EVT VT = N->getValueType(0);
26260 unsigned NumElts = VT.getVectorNumElements();
26261
26262 SDValue N0 = N->getOperand(0);
26263 SDValue N1 = N->getOperand(1);
26264
26265 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
26266
26267 // Canonicalize shuffle undef, undef -> undef
26268 if (N0.isUndef() && N1.isUndef())
26269 return DAG.getUNDEF(VT);
26270
26271 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
26272
26273 // Canonicalize shuffle v, v -> v, undef
26274 if (N0 == N1)
26275 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
26276 createUnaryMask(SVN->getMask(), NumElts));
26277
26278 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
26279 if (N0.isUndef())
26280 return DAG.getCommutedVectorShuffle(*SVN);
26281
26282 // Remove references to rhs if it is undef
26283 if (N1.isUndef()) {
26284 bool Changed = false;
26285 SmallVector<int, 8> NewMask;
26286 for (unsigned i = 0; i != NumElts; ++i) {
26287 int Idx = SVN->getMaskElt(i);
26288 if (Idx >= (int)NumElts) {
26289 Idx = -1;
26290 Changed = true;
26291 }
26292 NewMask.push_back(Idx);
26293 }
26294 if (Changed)
26295 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
26296 }
26297
26298 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
26299 return InsElt;
26300
26301 // A shuffle of a single vector that is a splatted value can always be folded.
26302 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
26303 return V;
26304
26305 if (SDValue V = formSplatFromShuffles(SVN, DAG))
26306 return V;
26307
26308 // If it is a splat, check if the argument vector is another splat or a
26309 // build_vector.
26310 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
26311 int SplatIndex = SVN->getSplatIndex();
26312 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
26313 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
26314 // splat (vector_bo L, R), Index -->
26315 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
26316 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
26317 SDLoc DL(N);
26318 EVT EltVT = VT.getScalarType();
26319 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
26320 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
26321 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
26322 SDValue NewBO =
26323 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
26324 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
26326 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
26327 }
26328
26329 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
26330 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
26331 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
26332 N0.hasOneUse()) {
26333 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
26334 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
26335
26337 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
26338 if (Idx->getAPIntValue() == SplatIndex)
26339 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
26340
26341 // Look through a bitcast if LE and splatting lane 0, through to a
26342 // scalar_to_vector or a build_vector.
26343 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
26344 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
26347 EVT N00VT = N0.getOperand(0).getValueType();
26348 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
26349 VT.isInteger() && N00VT.isInteger()) {
26350 EVT InVT =
26353 SDLoc(N), InVT);
26354 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
26355 }
26356 }
26357 }
26358
26359 // If this is a bit convert that changes the element type of the vector but
26360 // not the number of vector elements, look through it. Be careful not to
26361 // look though conversions that change things like v4f32 to v2f64.
26362 SDNode *V = N0.getNode();
26363 if (V->getOpcode() == ISD::BITCAST) {
26364 SDValue ConvInput = V->getOperand(0);
26365 if (ConvInput.getValueType().isVector() &&
26366 ConvInput.getValueType().getVectorNumElements() == NumElts)
26367 V = ConvInput.getNode();
26368 }
26369
26370 if (V->getOpcode() == ISD::BUILD_VECTOR) {
26371 assert(V->getNumOperands() == NumElts &&
26372 "BUILD_VECTOR has wrong number of operands");
26373 SDValue Base;
26374 bool AllSame = true;
26375 for (unsigned i = 0; i != NumElts; ++i) {
26376 if (!V->getOperand(i).isUndef()) {
26377 Base = V->getOperand(i);
26378 break;
26379 }
26380 }
26381 // Splat of <u, u, u, u>, return <u, u, u, u>
26382 if (!Base.getNode())
26383 return N0;
26384 for (unsigned i = 0; i != NumElts; ++i) {
26385 if (V->getOperand(i) != Base) {
26386 AllSame = false;
26387 break;
26388 }
26389 }
26390 // Splat of <x, x, x, x>, return <x, x, x, x>
26391 if (AllSame)
26392 return N0;
26393
26394 // Canonicalize any other splat as a build_vector, but avoid defining any
26395 // undefined elements in the mask.
26396 SDValue Splatted = V->getOperand(SplatIndex);
26397 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
26398 EVT EltVT = Splatted.getValueType();
26399
26400 for (unsigned i = 0; i != NumElts; ++i) {
26401 if (SVN->getMaskElt(i) < 0)
26402 Ops[i] = DAG.getUNDEF(EltVT);
26403 }
26404
26405 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
26406
26407 // We may have jumped through bitcasts, so the type of the
26408 // BUILD_VECTOR may not match the type of the shuffle.
26409 if (V->getValueType(0) != VT)
26410 NewBV = DAG.getBitcast(VT, NewBV);
26411 return NewBV;
26412 }
26413 }
26414
26415 // Simplify source operands based on shuffle mask.
26417 return SDValue(N, 0);
26418
26419 // This is intentionally placed after demanded elements simplification because
26420 // it could eliminate knowledge of undef elements created by this shuffle.
26421 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
26422 return ShufOp;
26423
26424 // Match shuffles that can be converted to any_vector_extend_in_reg.
26425 if (SDValue V =
26426 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
26427 return V;
26428
26429 // Combine "truncate_vector_in_reg" style shuffles.
26430 if (SDValue V = combineTruncationShuffle(SVN, DAG))
26431 return V;
26432
26433 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
26434 Level < AfterLegalizeVectorOps &&
26435 (N1.isUndef() ||
26436 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
26437 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
26438 if (SDValue V = partitionShuffleOfConcats(N, DAG))
26439 return V;
26440 }
26441
26442 // A shuffle of a concat of the same narrow vector can be reduced to use
26443 // only low-half elements of a concat with undef:
26444 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
26445 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
26446 N0.getNumOperands() == 2 &&
26447 N0.getOperand(0) == N0.getOperand(1)) {
26448 int HalfNumElts = (int)NumElts / 2;
26449 SmallVector<int, 8> NewMask;
26450 for (unsigned i = 0; i != NumElts; ++i) {
26451 int Idx = SVN->getMaskElt(i);
26452 if (Idx >= HalfNumElts) {
26453 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
26454 Idx -= HalfNumElts;
26455 }
26456 NewMask.push_back(Idx);
26457 }
26458 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
26459 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
26460 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
26461 N0.getOperand(0), UndefVec);
26462 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
26463 }
26464 }
26465
26466 // See if we can replace a shuffle with an insert_subvector.
26467 // e.g. v2i32 into v8i32:
26468 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
26469 // --> insert_subvector(lhs,rhs1,4).
26470 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
26472 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
26473 // Ensure RHS subvectors are legal.
26474 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
26475 EVT SubVT = RHS.getOperand(0).getValueType();
26476 int NumSubVecs = RHS.getNumOperands();
26477 int NumSubElts = SubVT.getVectorNumElements();
26478 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
26479 if (!TLI.isTypeLegal(SubVT))
26480 return SDValue();
26481
26482 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
26483 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
26484 return SDValue();
26485
26486 // Search [NumSubElts] spans for RHS sequence.
26487 // TODO: Can we avoid nested loops to increase performance?
26488 SmallVector<int> InsertionMask(NumElts);
26489 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
26490 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
26491 // Reset mask to identity.
26492 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
26493
26494 // Add subvector insertion.
26495 std::iota(InsertionMask.begin() + SubIdx,
26496 InsertionMask.begin() + SubIdx + NumSubElts,
26497 NumElts + (SubVec * NumSubElts));
26498
26499 // See if the shuffle mask matches the reference insertion mask.
26500 bool MatchingShuffle = true;
26501 for (int i = 0; i != (int)NumElts; ++i) {
26502 int ExpectIdx = InsertionMask[i];
26503 int ActualIdx = Mask[i];
26504 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
26505 MatchingShuffle = false;
26506 break;
26507 }
26508 }
26509
26510 if (MatchingShuffle)
26511 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
26512 RHS.getOperand(SubVec),
26513 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
26514 }
26515 }
26516 return SDValue();
26517 };
26518 ArrayRef<int> Mask = SVN->getMask();
26519 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
26520 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
26521 return InsertN1;
26522 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
26523 SmallVector<int> CommuteMask(Mask);
26525 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
26526 return InsertN0;
26527 }
26528 }
26529
26530 // If we're not performing a select/blend shuffle, see if we can convert the
26531 // shuffle into a AND node, with all the out-of-lane elements are known zero.
26532 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
26533 bool IsInLaneMask = true;
26534 ArrayRef<int> Mask = SVN->getMask();
26535 SmallVector<int, 16> ClearMask(NumElts, -1);
26536 APInt DemandedLHS = APInt::getZero(NumElts);
26537 APInt DemandedRHS = APInt::getZero(NumElts);
26538 for (int I = 0; I != (int)NumElts; ++I) {
26539 int M = Mask[I];
26540 if (M < 0)
26541 continue;
26542 ClearMask[I] = M == I ? I : (I + NumElts);
26543 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
26544 if (M != I) {
26545 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
26546 Demanded.setBit(M % NumElts);
26547 }
26548 }
26549 // TODO: Should we try to mask with N1 as well?
26550 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
26551 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
26552 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
26553 SDLoc DL(N);
26556 // Transform the type to a legal type so that the buildvector constant
26557 // elements are not illegal. Make sure that the result is larger than the
26558 // original type, incase the value is split into two (eg i64->i32).
26559 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
26560 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
26561 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
26562 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
26563 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
26564 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
26565 for (int I = 0; I != (int)NumElts; ++I)
26566 if (0 <= Mask[I])
26567 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
26568
26569 // See if a clear mask is legal instead of going via
26570 // XformToShuffleWithZero which loses UNDEF mask elements.
26571 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
26572 return DAG.getBitcast(
26573 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
26574 DAG.getConstant(0, DL, IntVT), ClearMask));
26575
26576 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
26577 return DAG.getBitcast(
26578 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
26579 DAG.getBuildVector(IntVT, DL, AndMask)));
26580 }
26581 }
26582 }
26583
26584 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26585 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
26586 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
26587 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
26588 return Res;
26589
26590 // If this shuffle only has a single input that is a bitcasted shuffle,
26591 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
26592 // back to their original types.
26593 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
26594 N1.isUndef() && Level < AfterLegalizeVectorOps &&
26595 TLI.isTypeLegal(VT)) {
26596
26598 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
26599 EVT SVT = VT.getScalarType();
26600 EVT InnerVT = BC0->getValueType(0);
26601 EVT InnerSVT = InnerVT.getScalarType();
26602
26603 // Determine which shuffle works with the smaller scalar type.
26604 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
26605 EVT ScaleSVT = ScaleVT.getScalarType();
26606
26607 if (TLI.isTypeLegal(ScaleVT) &&
26608 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
26609 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
26610 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26611 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26612
26613 // Scale the shuffle masks to the smaller scalar type.
26614 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
26615 SmallVector<int, 8> InnerMask;
26616 SmallVector<int, 8> OuterMask;
26617 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
26618 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
26619
26620 // Merge the shuffle masks.
26621 SmallVector<int, 8> NewMask;
26622 for (int M : OuterMask)
26623 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
26624
26625 // Test for shuffle mask legality over both commutations.
26626 SDValue SV0 = BC0->getOperand(0);
26627 SDValue SV1 = BC0->getOperand(1);
26628 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26629 if (!LegalMask) {
26630 std::swap(SV0, SV1);
26632 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26633 }
26634
26635 if (LegalMask) {
26636 SV0 = DAG.getBitcast(ScaleVT, SV0);
26637 SV1 = DAG.getBitcast(ScaleVT, SV1);
26638 return DAG.getBitcast(
26639 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
26640 }
26641 }
26642 }
26643 }
26644
26645 // Match shuffles of bitcasts, so long as the mask can be treated as the
26646 // larger type.
26647 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
26648 return V;
26649
26650 // Compute the combined shuffle mask for a shuffle with SV0 as the first
26651 // operand, and SV1 as the second operand.
26652 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
26653 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
26654 auto MergeInnerShuffle =
26655 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
26656 ShuffleVectorSDNode *OtherSVN, SDValue N1,
26657 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
26658 SmallVectorImpl<int> &Mask) -> bool {
26659 // Don't try to fold splats; they're likely to simplify somehow, or they
26660 // might be free.
26661 if (OtherSVN->isSplat())
26662 return false;
26663
26664 SV0 = SV1 = SDValue();
26665 Mask.clear();
26666
26667 for (unsigned i = 0; i != NumElts; ++i) {
26668 int Idx = SVN->getMaskElt(i);
26669 if (Idx < 0) {
26670 // Propagate Undef.
26671 Mask.push_back(Idx);
26672 continue;
26673 }
26674
26675 if (Commute)
26676 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
26677
26678 SDValue CurrentVec;
26679 if (Idx < (int)NumElts) {
26680 // This shuffle index refers to the inner shuffle N0. Lookup the inner
26681 // shuffle mask to identify which vector is actually referenced.
26682 Idx = OtherSVN->getMaskElt(Idx);
26683 if (Idx < 0) {
26684 // Propagate Undef.
26685 Mask.push_back(Idx);
26686 continue;
26687 }
26688 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
26689 : OtherSVN->getOperand(1);
26690 } else {
26691 // This shuffle index references an element within N1.
26692 CurrentVec = N1;
26693 }
26694
26695 // Simple case where 'CurrentVec' is UNDEF.
26696 if (CurrentVec.isUndef()) {
26697 Mask.push_back(-1);
26698 continue;
26699 }
26700
26701 // Canonicalize the shuffle index. We don't know yet if CurrentVec
26702 // will be the first or second operand of the combined shuffle.
26703 Idx = Idx % NumElts;
26704 if (!SV0.getNode() || SV0 == CurrentVec) {
26705 // Ok. CurrentVec is the left hand side.
26706 // Update the mask accordingly.
26707 SV0 = CurrentVec;
26708 Mask.push_back(Idx);
26709 continue;
26710 }
26711 if (!SV1.getNode() || SV1 == CurrentVec) {
26712 // Ok. CurrentVec is the right hand side.
26713 // Update the mask accordingly.
26714 SV1 = CurrentVec;
26715 Mask.push_back(Idx + NumElts);
26716 continue;
26717 }
26718
26719 // Last chance - see if the vector is another shuffle and if it
26720 // uses one of the existing candidate shuffle ops.
26721 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
26722 int InnerIdx = CurrentSVN->getMaskElt(Idx);
26723 if (InnerIdx < 0) {
26724 Mask.push_back(-1);
26725 continue;
26726 }
26727 SDValue InnerVec = (InnerIdx < (int)NumElts)
26728 ? CurrentSVN->getOperand(0)
26729 : CurrentSVN->getOperand(1);
26730 if (InnerVec.isUndef()) {
26731 Mask.push_back(-1);
26732 continue;
26733 }
26734 InnerIdx %= NumElts;
26735 if (InnerVec == SV0) {
26736 Mask.push_back(InnerIdx);
26737 continue;
26738 }
26739 if (InnerVec == SV1) {
26740 Mask.push_back(InnerIdx + NumElts);
26741 continue;
26742 }
26743 }
26744
26745 // Bail out if we cannot convert the shuffle pair into a single shuffle.
26746 return false;
26747 }
26748
26749 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26750 return true;
26751
26752 // Avoid introducing shuffles with illegal mask.
26753 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26754 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26755 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26756 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
26757 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
26758 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
26759 if (TLI.isShuffleMaskLegal(Mask, VT))
26760 return true;
26761
26762 std::swap(SV0, SV1);
26764 return TLI.isShuffleMaskLegal(Mask, VT);
26765 };
26766
26767 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
26768 // Canonicalize shuffles according to rules:
26769 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
26770 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
26771 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
26772 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26774 // The incoming shuffle must be of the same type as the result of the
26775 // current shuffle.
26776 assert(N1->getOperand(0).getValueType() == VT &&
26777 "Shuffle types don't match");
26778
26779 SDValue SV0 = N1->getOperand(0);
26780 SDValue SV1 = N1->getOperand(1);
26781 bool HasSameOp0 = N0 == SV0;
26782 bool IsSV1Undef = SV1.isUndef();
26783 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
26784 // Commute the operands of this shuffle so merging below will trigger.
26785 return DAG.getCommutedVectorShuffle(*SVN);
26786 }
26787
26788 // Canonicalize splat shuffles to the RHS to improve merging below.
26789 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
26790 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
26791 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26792 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
26793 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
26794 return DAG.getCommutedVectorShuffle(*SVN);
26795 }
26796
26797 // Try to fold according to rules:
26798 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26799 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26800 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26801 // Don't try to fold shuffles with illegal type.
26802 // Only fold if this shuffle is the only user of the other shuffle.
26803 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
26804 for (int i = 0; i != 2; ++i) {
26805 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
26806 N->isOnlyUserOf(N->getOperand(i).getNode())) {
26807 // The incoming shuffle must be of the same type as the result of the
26808 // current shuffle.
26809 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
26810 assert(OtherSV->getOperand(0).getValueType() == VT &&
26811 "Shuffle types don't match");
26812
26813 SDValue SV0, SV1;
26815 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
26816 SV0, SV1, Mask)) {
26817 // Check if all indices in Mask are Undef. In case, propagate Undef.
26818 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26819 return DAG.getUNDEF(VT);
26820
26821 return DAG.getVectorShuffle(VT, SDLoc(N),
26822 SV0 ? SV0 : DAG.getUNDEF(VT),
26823 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
26824 }
26825 }
26826 }
26827
26828 // Merge shuffles through binops if we are able to merge it with at least
26829 // one other shuffles.
26830 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
26831 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
26832 unsigned SrcOpcode = N0.getOpcode();
26833 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
26834 (N1.isUndef() ||
26835 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
26836 // Get binop source ops, or just pass on the undef.
26837 SDValue Op00 = N0.getOperand(0);
26838 SDValue Op01 = N0.getOperand(1);
26839 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
26840 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
26841 // TODO: We might be able to relax the VT check but we don't currently
26842 // have any isBinOp() that has different result/ops VTs so play safe until
26843 // we have test coverage.
26844 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
26845 Op01.getValueType() == VT && Op11.getValueType() == VT &&
26846 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
26847 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
26848 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
26849 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
26850 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
26851 SmallVectorImpl<int> &Mask, bool LeftOp,
26852 bool Commute) {
26853 SDValue InnerN = Commute ? N1 : N0;
26854 SDValue Op0 = LeftOp ? Op00 : Op01;
26855 SDValue Op1 = LeftOp ? Op10 : Op11;
26856 if (Commute)
26857 std::swap(Op0, Op1);
26858 // Only accept the merged shuffle if we don't introduce undef elements,
26859 // or the inner shuffle already contained undef elements.
26860 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
26861 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
26862 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
26863 Mask) &&
26864 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
26865 llvm::none_of(Mask, [](int M) { return M < 0; }));
26866 };
26867
26868 // Ensure we don't increase the number of shuffles - we must merge a
26869 // shuffle from at least one of the LHS and RHS ops.
26870 bool MergedLeft = false;
26871 SDValue LeftSV0, LeftSV1;
26872 SmallVector<int, 4> LeftMask;
26873 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
26874 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
26875 MergedLeft = true;
26876 } else {
26877 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26878 LeftSV0 = Op00, LeftSV1 = Op10;
26879 }
26880
26881 bool MergedRight = false;
26882 SDValue RightSV0, RightSV1;
26883 SmallVector<int, 4> RightMask;
26884 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
26885 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
26886 MergedRight = true;
26887 } else {
26888 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26889 RightSV0 = Op01, RightSV1 = Op11;
26890 }
26891
26892 if (MergedLeft || MergedRight) {
26893 SDLoc DL(N);
26895 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
26896 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
26898 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
26899 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
26900 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
26901 }
26902 }
26903 }
26904 }
26905
26906 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
26907 return V;
26908
26909 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26910 // Perform this really late, because it could eliminate knowledge
26911 // of undef elements created by this shuffle.
26912 if (Level < AfterLegalizeTypes)
26913 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26914 LegalOperations))
26915 return V;
26916
26917 return SDValue();
26918}
26919
26920SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26921 EVT VT = N->getValueType(0);
26922 if (!VT.isFixedLengthVector())
26923 return SDValue();
26924
26925 // Try to convert a scalar binop with an extracted vector element to a vector
26926 // binop. This is intended to reduce potentially expensive register moves.
26927 // TODO: Check if both operands are extracted.
26928 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26929 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26930 SDValue Scalar = N->getOperand(0);
26931 unsigned Opcode = Scalar.getOpcode();
26932 EVT VecEltVT = VT.getScalarType();
26933 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26934 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26935 Scalar.getOperand(0).getValueType() == VecEltVT &&
26936 Scalar.getOperand(1).getValueType() == VecEltVT &&
26937 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26938 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26939 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26940 // Match an extract element and get a shuffle mask equivalent.
26941 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26942
26943 for (int i : {0, 1}) {
26944 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26945 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26946 SDValue EE = Scalar.getOperand(i);
26947 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26948 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26949 EE.getOperand(0).getValueType() == VT &&
26950 isa<ConstantSDNode>(EE.getOperand(1))) {
26951 // Mask = {ExtractIndex, undef, undef....}
26952 ShufMask[0] = EE.getConstantOperandVal(1);
26953 // Make sure the shuffle is legal if we are crossing lanes.
26954 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26955 SDLoc DL(N);
26956 SDValue V[] = {EE.getOperand(0),
26957 DAG.getConstant(C->getAPIntValue(), DL, VT)};
26958 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26959 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26960 ShufMask);
26961 }
26962 }
26963 }
26964 }
26965
26966 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26967 // with a VECTOR_SHUFFLE and possible truncate.
26968 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26969 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26970 return SDValue();
26971
26972 // If we have an implicit truncate, truncate here if it is legal.
26973 if (VecEltVT != Scalar.getValueType() &&
26974 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26975 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26976 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26977 }
26978
26979 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26980 if (!ExtIndexC)
26981 return SDValue();
26982
26983 SDValue SrcVec = Scalar.getOperand(0);
26984 EVT SrcVT = SrcVec.getValueType();
26985 unsigned SrcNumElts = SrcVT.getVectorNumElements();
26986 unsigned VTNumElts = VT.getVectorNumElements();
26987 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26988 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26989 SmallVector<int, 8> Mask(SrcNumElts, -1);
26990 Mask[0] = ExtIndexC->getZExtValue();
26991 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26992 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26993 if (!LegalShuffle)
26994 return SDValue();
26995
26996 // If the initial vector is the same size, the shuffle is the result.
26997 if (VT == SrcVT)
26998 return LegalShuffle;
26999
27000 // If not, shorten the shuffled vector.
27001 if (VTNumElts != SrcNumElts) {
27002 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
27003 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
27004 SrcVT.getVectorElementType(), VTNumElts);
27005 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
27006 ZeroIdx);
27007 }
27008 }
27009
27010 return SDValue();
27011}
27012
27013SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
27014 EVT VT = N->getValueType(0);
27015 SDValue N0 = N->getOperand(0);
27016 SDValue N1 = N->getOperand(1);
27017 SDValue N2 = N->getOperand(2);
27018 uint64_t InsIdx = N->getConstantOperandVal(2);
27019
27020 // If inserting an UNDEF, just return the original vector.
27021 if (N1.isUndef())
27022 return N0;
27023
27024 // If this is an insert of an extracted vector into an undef vector, we can
27025 // just use the input to the extract if the types match, and can simplify
27026 // in some cases even if they don't.
27027 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
27028 N1.getOperand(1) == N2) {
27029 EVT SrcVT = N1.getOperand(0).getValueType();
27030 if (SrcVT == VT)
27031 return N1.getOperand(0);
27032 // TODO: To remove the zero check, need to adjust the offset to
27033 // a multiple of the new src type.
27034 if (isNullConstant(N2)) {
27035 if (VT.knownBitsGE(SrcVT) &&
27036 !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
27037 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27038 VT, N0, N1.getOperand(0), N2);
27039 else if (VT.knownBitsLE(SrcVT) &&
27040 !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
27042 VT, N1.getOperand(0), N2);
27043 }
27044 }
27045
27046 // Handle case where we've ended up inserting back into the source vector
27047 // we extracted the subvector from.
27048 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
27049 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
27050 N1.getOperand(1) == N2)
27051 return N0;
27052
27053 // Simplify scalar inserts into an undef vector:
27054 // insert_subvector undef, (splat X), N2 -> splat X
27055 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
27056 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
27057 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
27058
27059 // If we are inserting a bitcast value into an undef, with the same
27060 // number of elements, just use the bitcast input of the extract.
27061 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
27062 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
27063 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
27065 N1.getOperand(0).getOperand(1) == N2 &&
27067 VT.getVectorElementCount() &&
27069 VT.getSizeInBits()) {
27070 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
27071 }
27072
27073 // If both N1 and N2 are bitcast values on which insert_subvector
27074 // would makes sense, pull the bitcast through.
27075 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
27076 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
27077 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
27078 SDValue CN0 = N0.getOperand(0);
27079 SDValue CN1 = N1.getOperand(0);
27080 EVT CN0VT = CN0.getValueType();
27081 EVT CN1VT = CN1.getValueType();
27082 if (CN0VT.isVector() && CN1VT.isVector() &&
27083 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
27085 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27086 CN0.getValueType(), CN0, CN1, N2);
27087 return DAG.getBitcast(VT, NewINSERT);
27088 }
27089 }
27090
27091 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
27092 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
27093 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
27094 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
27095 N0.getOperand(1).getValueType() == N1.getValueType() &&
27096 N0.getOperand(2) == N2)
27097 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
27098 N1, N2);
27099
27100 // Eliminate an intermediate insert into an undef vector:
27101 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
27102 // insert_subvector undef, X, 0
27103 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
27104 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
27105 isNullConstant(N2))
27106 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
27107 N1.getOperand(1), N2);
27108
27109 // Push subvector bitcasts to the output, adjusting the index as we go.
27110 // insert_subvector(bitcast(v), bitcast(s), c1)
27111 // -> bitcast(insert_subvector(v, s, c2))
27112 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
27113 N1.getOpcode() == ISD::BITCAST) {
27114 SDValue N0Src = peekThroughBitcasts(N0);
27115 SDValue N1Src = peekThroughBitcasts(N1);
27116 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
27117 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
27118 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
27119 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
27120 EVT NewVT;
27121 SDLoc DL(N);
27122 SDValue NewIdx;
27123 LLVMContext &Ctx = *DAG.getContext();
27124 ElementCount NumElts = VT.getVectorElementCount();
27125 unsigned EltSizeInBits = VT.getScalarSizeInBits();
27126 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
27127 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
27128 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
27129 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
27130 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
27131 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
27132 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
27133 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
27134 NumElts.divideCoefficientBy(Scale));
27135 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
27136 }
27137 }
27138 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
27139 SDValue Res = DAG.getBitcast(NewVT, N0Src);
27140 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
27141 return DAG.getBitcast(VT, Res);
27142 }
27143 }
27144 }
27145
27146 // Canonicalize insert_subvector dag nodes.
27147 // Example:
27148 // (insert_subvector (insert_subvector A, Idx0), Idx1)
27149 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
27150 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
27151 N1.getValueType() == N0.getOperand(1).getValueType()) {
27152 unsigned OtherIdx = N0.getConstantOperandVal(2);
27153 if (InsIdx < OtherIdx) {
27154 // Swap nodes.
27155 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
27156 N0.getOperand(0), N1, N2);
27157 AddToWorklist(NewOp.getNode());
27158 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
27159 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
27160 }
27161 }
27162
27163 // If the input vector is a concatenation, and the insert replaces
27164 // one of the pieces, we can optimize into a single concat_vectors.
27165 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
27166 N0.getOperand(0).getValueType() == N1.getValueType() &&
27169 unsigned Factor = N1.getValueType().getVectorMinNumElements();
27170 SmallVector<SDValue, 8> Ops(N0->ops());
27171 Ops[InsIdx / Factor] = N1;
27172 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
27173 }
27174
27175 // Simplify source operands based on insertion.
27177 return SDValue(N, 0);
27178
27179 return SDValue();
27180}
27181
27182SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
27183 SDValue N0 = N->getOperand(0);
27184
27185 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
27186 if (N0->getOpcode() == ISD::FP16_TO_FP)
27187 return N0->getOperand(0);
27188
27189 return SDValue();
27190}
27191
27192SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
27193 auto Op = N->getOpcode();
27195 "opcode should be FP16_TO_FP or BF16_TO_FP.");
27196 SDValue N0 = N->getOperand(0);
27197
27198 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
27199 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
27200 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
27202 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
27203 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
27204 }
27205 }
27206
27207 // Sometimes constants manage to survive very late in the pipeline, e.g.,
27208 // because they are wrapped inside the <1 x f16> type. Try one last time to
27209 // get rid of them.
27210 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
27211 N->getValueType(0), {N0});
27212 return Folded;
27213}
27214
27215SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
27216 SDValue N0 = N->getOperand(0);
27217
27218 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
27219 if (N0->getOpcode() == ISD::BF16_TO_FP)
27220 return N0->getOperand(0);
27221
27222 return SDValue();
27223}
27224
27225SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
27226 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
27227 return visitFP16_TO_FP(N);
27228}
27229
27230SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
27231 SDValue N0 = N->getOperand(0);
27232 EVT VT = N0.getValueType();
27233 unsigned Opcode = N->getOpcode();
27234
27235 // VECREDUCE over 1-element vector is just an extract.
27236 if (VT.getVectorElementCount().isScalar()) {
27237 SDLoc dl(N);
27238 SDValue Res =
27240 DAG.getVectorIdxConstant(0, dl));
27241 if (Res.getValueType() != N->getValueType(0))
27242 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
27243 return Res;
27244 }
27245
27246 // On an boolean vector an and/or reduction is the same as a umin/umax
27247 // reduction. Convert them if the latter is legal while the former isn't.
27248 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
27249 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
27251 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
27252 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
27254 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
27255 }
27256
27257 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
27258 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
27259 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
27260 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
27261 SDValue Vec = N0.getOperand(0);
27262 SDValue Subvec = N0.getOperand(1);
27263 if ((Opcode == ISD::VECREDUCE_OR &&
27264 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
27265 (Opcode == ISD::VECREDUCE_AND &&
27266 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
27267 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
27268 }
27269
27270 // vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
27271 // Same for zext and anyext, and for and/or/xor reductions.
27272 if ((Opcode == ISD::VECREDUCE_OR || Opcode == ISD::VECREDUCE_AND ||
27273 Opcode == ISD::VECREDUCE_XOR) &&
27274 (N0.getOpcode() == ISD::SIGN_EXTEND ||
27275 N0.getOpcode() == ISD::ZERO_EXTEND ||
27276 N0.getOpcode() == ISD::ANY_EXTEND) &&
27277 TLI.isOperationLegalOrCustom(Opcode, N0.getOperand(0).getValueType())) {
27278 SDValue Red = DAG.getNode(Opcode, SDLoc(N),
27280 N0.getOperand(0));
27281 return DAG.getNode(N0.getOpcode(), SDLoc(N), N->getValueType(0), Red);
27282 }
27283 return SDValue();
27284}
27285
27286SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
27287 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
27288
27289 // FSUB -> FMA combines:
27290 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
27291 AddToWorklist(Fused.getNode());
27292 return Fused;
27293 }
27294 return SDValue();
27295}
27296
27297SDValue DAGCombiner::visitVPOp(SDNode *N) {
27298
27299 if (N->getOpcode() == ISD::VP_GATHER)
27300 if (SDValue SD = visitVPGATHER(N))
27301 return SD;
27302
27303 if (N->getOpcode() == ISD::VP_SCATTER)
27304 if (SDValue SD = visitVPSCATTER(N))
27305 return SD;
27306
27307 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
27308 if (SDValue SD = visitVP_STRIDED_LOAD(N))
27309 return SD;
27310
27311 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
27312 if (SDValue SD = visitVP_STRIDED_STORE(N))
27313 return SD;
27314
27315 // VP operations in which all vector elements are disabled - either by
27316 // determining that the mask is all false or that the EVL is 0 - can be
27317 // eliminated.
27318 bool AreAllEltsDisabled = false;
27319 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
27320 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
27321 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
27322 AreAllEltsDisabled |=
27323 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
27324
27325 // This is the only generic VP combine we support for now.
27326 if (!AreAllEltsDisabled) {
27327 switch (N->getOpcode()) {
27328 case ISD::VP_FADD:
27329 return visitVP_FADD(N);
27330 case ISD::VP_FSUB:
27331 return visitVP_FSUB(N);
27332 case ISD::VP_FMA:
27333 return visitFMA<VPMatchContext>(N);
27334 case ISD::VP_SELECT:
27335 return visitVP_SELECT(N);
27336 case ISD::VP_MUL:
27337 return visitMUL<VPMatchContext>(N);
27338 case ISD::VP_SUB:
27339 return foldSubCtlzNot<VPMatchContext>(N, DAG);
27340 default:
27341 break;
27342 }
27343 return SDValue();
27344 }
27345
27346 // Binary operations can be replaced by UNDEF.
27347 if (ISD::isVPBinaryOp(N->getOpcode()))
27348 return DAG.getUNDEF(N->getValueType(0));
27349
27350 // VP Memory operations can be replaced by either the chain (stores) or the
27351 // chain + undef (loads).
27352 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
27353 if (MemSD->writeMem())
27354 return MemSD->getChain();
27355 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
27356 }
27357
27358 // Reduction operations return the start operand when no elements are active.
27359 if (ISD::isVPReduction(N->getOpcode()))
27360 return N->getOperand(0);
27361
27362 return SDValue();
27363}
27364
27365SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
27366 SDValue Chain = N->getOperand(0);
27367 SDValue Ptr = N->getOperand(1);
27368 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
27369
27370 // Check if the memory, where FP state is written to, is used only in a single
27371 // load operation.
27372 LoadSDNode *LdNode = nullptr;
27373 for (auto *U : Ptr->users()) {
27374 if (U == N)
27375 continue;
27376 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
27377 if (LdNode && LdNode != Ld)
27378 return SDValue();
27379 LdNode = Ld;
27380 continue;
27381 }
27382 return SDValue();
27383 }
27384 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
27385 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
27387 return SDValue();
27388
27389 // Check if the loaded value is used only in a store operation.
27390 StoreSDNode *StNode = nullptr;
27391 for (SDUse &U : LdNode->uses()) {
27392 if (U.getResNo() == 0) {
27393 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
27394 if (StNode)
27395 return SDValue();
27396 StNode = St;
27397 } else {
27398 return SDValue();
27399 }
27400 }
27401 }
27402 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
27403 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
27404 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
27405 return SDValue();
27406
27407 // Create new node GET_FPENV_MEM, which uses the store address to write FP
27408 // environment.
27409 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
27410 StNode->getMemOperand());
27411 CombineTo(StNode, Res, false);
27412 return Res;
27413}
27414
27415SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
27416 SDValue Chain = N->getOperand(0);
27417 SDValue Ptr = N->getOperand(1);
27418 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
27419
27420 // Check if the address of FP state is used also in a store operation only.
27421 StoreSDNode *StNode = nullptr;
27422 for (auto *U : Ptr->users()) {
27423 if (U == N)
27424 continue;
27425 if (auto *St = dyn_cast<StoreSDNode>(U)) {
27426 if (StNode && StNode != St)
27427 return SDValue();
27428 StNode = St;
27429 continue;
27430 }
27431 return SDValue();
27432 }
27433 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
27434 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
27435 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
27436 return SDValue();
27437
27438 // Check if the stored value is loaded from some location and the loaded
27439 // value is used only in the store operation.
27440 SDValue StValue = StNode->getValue();
27441 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
27442 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
27443 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
27444 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
27445 return SDValue();
27446
27447 // Create new node SET_FPENV_MEM, which uses the load address to read FP
27448 // environment.
27449 SDValue Res =
27450 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
27451 LdNode->getMemOperand());
27452 return Res;
27453}
27454
27455/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
27456/// with the destination vector and a zero vector.
27457/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
27458/// vector_shuffle V, Zero, <0, 4, 2, 4>
27459SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
27460 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
27461
27462 EVT VT = N->getValueType(0);
27463 SDValue LHS = N->getOperand(0);
27464 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
27465 SDLoc DL(N);
27466
27467 // Make sure we're not running after operation legalization where it
27468 // may have custom lowered the vector shuffles.
27469 if (LegalOperations)
27470 return SDValue();
27471
27472 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
27473 return SDValue();
27474
27475 EVT RVT = RHS.getValueType();
27476 unsigned NumElts = RHS.getNumOperands();
27477
27478 // Attempt to create a valid clear mask, splitting the mask into
27479 // sub elements and checking to see if each is
27480 // all zeros or all ones - suitable for shuffle masking.
27481 auto BuildClearMask = [&](int Split) {
27482 int NumSubElts = NumElts * Split;
27483 int NumSubBits = RVT.getScalarSizeInBits() / Split;
27484
27485 SmallVector<int, 8> Indices;
27486 for (int i = 0; i != NumSubElts; ++i) {
27487 int EltIdx = i / Split;
27488 int SubIdx = i % Split;
27489 SDValue Elt = RHS.getOperand(EltIdx);
27490 // X & undef --> 0 (not undef). So this lane must be converted to choose
27491 // from the zero constant vector (same as if the element had all 0-bits).
27492 if (Elt.isUndef()) {
27493 Indices.push_back(i + NumSubElts);
27494 continue;
27495 }
27496
27497 APInt Bits;
27498 if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
27499 Bits = Cst->getAPIntValue();
27500 else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
27501 Bits = CstFP->getValueAPF().bitcastToAPInt();
27502 else
27503 return SDValue();
27504
27505 // Extract the sub element from the constant bit mask.
27506 if (DAG.getDataLayout().isBigEndian())
27507 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
27508 else
27509 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
27510
27511 if (Bits.isAllOnes())
27512 Indices.push_back(i);
27513 else if (Bits == 0)
27514 Indices.push_back(i + NumSubElts);
27515 else
27516 return SDValue();
27517 }
27518
27519 // Let's see if the target supports this vector_shuffle.
27520 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
27521 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
27522 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
27523 return SDValue();
27524
27525 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
27526 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
27527 DAG.getBitcast(ClearVT, LHS),
27528 Zero, Indices));
27529 };
27530
27531 // Determine maximum split level (byte level masking).
27532 int MaxSplit = 1;
27533 if (RVT.getScalarSizeInBits() % 8 == 0)
27534 MaxSplit = RVT.getScalarSizeInBits() / 8;
27535
27536 for (int Split = 1; Split <= MaxSplit; ++Split)
27537 if (RVT.getScalarSizeInBits() % Split == 0)
27538 if (SDValue S = BuildClearMask(Split))
27539 return S;
27540
27541 return SDValue();
27542}
27543
27544/// If a vector binop is performed on splat values, it may be profitable to
27545/// extract, scalarize, and insert/splat.
27547 const SDLoc &DL, bool LegalTypes) {
27548 SDValue N0 = N->getOperand(0);
27549 SDValue N1 = N->getOperand(1);
27550 unsigned Opcode = N->getOpcode();
27551 EVT VT = N->getValueType(0);
27552 EVT EltVT = VT.getVectorElementType();
27553 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27554
27555 // TODO: Remove/replace the extract cost check? If the elements are available
27556 // as scalars, then there may be no extract cost. Should we ask if
27557 // inserting a scalar back into a vector is cheap instead?
27558 int Index0, Index1;
27559 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
27560 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
27561 // Extract element from splat_vector should be free.
27562 // TODO: use DAG.isSplatValue instead?
27563 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
27565 if (!Src0 || !Src1 || Index0 != Index1 ||
27566 Src0.getValueType().getVectorElementType() != EltVT ||
27567 Src1.getValueType().getVectorElementType() != EltVT ||
27568 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
27569 // If before type legalization, allow scalar types that will eventually be
27570 // made legal.
27572 Opcode, LegalTypes
27573 ? EltVT
27574 : TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)))
27575 return SDValue();
27576
27577 // FIXME: Type legalization can't handle illegal MULHS/MULHU.
27578 if ((Opcode == ISD::MULHS || Opcode == ISD::MULHU) && !TLI.isTypeLegal(EltVT))
27579 return SDValue();
27580
27581 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode()) {
27582 // All but one element should have an undef input, which will fold to a
27583 // constant or undef. Avoid splatting which would over-define potentially
27584 // undefined elements.
27585
27586 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
27587 // build_vec ..undef, (bo X, Y), undef...
27588 SmallVector<SDValue, 16> EltsX, EltsY, EltsResult;
27589 DAG.ExtractVectorElements(Src0, EltsX);
27590 DAG.ExtractVectorElements(Src1, EltsY);
27591
27592 for (auto [X, Y] : zip(EltsX, EltsY))
27593 EltsResult.push_back(DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()));
27594 return DAG.getBuildVector(VT, DL, EltsResult);
27595 }
27596
27597 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
27598 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
27599 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
27600 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
27601
27602 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
27603 return DAG.getSplat(VT, DL, ScalarBO);
27604}
27605
27606/// Visit a vector cast operation, like FP_EXTEND.
27607SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
27608 EVT VT = N->getValueType(0);
27609 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
27610 EVT EltVT = VT.getVectorElementType();
27611 unsigned Opcode = N->getOpcode();
27612
27613 SDValue N0 = N->getOperand(0);
27614 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27615
27616 // TODO: promote operation might be also good here?
27617 int Index0;
27618 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
27619 if (Src0 &&
27620 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
27621 TLI.isExtractVecEltCheap(VT, Index0)) &&
27622 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
27623 TLI.preferScalarizeSplat(N)) {
27624 EVT SrcVT = N0.getValueType();
27625 EVT SrcEltVT = SrcVT.getVectorElementType();
27626 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
27627 SDValue Elt =
27628 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
27629 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
27630 if (VT.isScalableVector())
27631 return DAG.getSplatVector(VT, DL, ScalarBO);
27633 return DAG.getBuildVector(VT, DL, Ops);
27634 }
27635
27636 return SDValue();
27637}
27638
27639/// Visit a binary vector operation, like ADD.
27640SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
27641 EVT VT = N->getValueType(0);
27642 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
27643
27644 SDValue LHS = N->getOperand(0);
27645 SDValue RHS = N->getOperand(1);
27646 unsigned Opcode = N->getOpcode();
27647 SDNodeFlags Flags = N->getFlags();
27648
27649 // Move unary shuffles with identical masks after a vector binop:
27650 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
27651 // --> shuffle (VBinOp A, B), Undef, Mask
27652 // This does not require type legality checks because we are creating the
27653 // same types of operations that are in the original sequence. We do have to
27654 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
27655 // though. This code is adapted from the identical transform in instcombine.
27656 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
27657 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
27658 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
27659 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
27660 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
27661 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
27662 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
27663 RHS.getOperand(0), Flags);
27664 SDValue UndefV = LHS.getOperand(1);
27665 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
27666 }
27667
27668 // Try to sink a splat shuffle after a binop with a uniform constant.
27669 // This is limited to cases where neither the shuffle nor the constant have
27670 // undefined elements because that could be poison-unsafe or inhibit
27671 // demanded elements analysis. It is further limited to not change a splat
27672 // of an inserted scalar because that may be optimized better by
27673 // load-folding or other target-specific behaviors.
27674 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
27675 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
27676 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27677 // binop (splat X), (splat C) --> splat (binop X, C)
27678 SDValue X = Shuf0->getOperand(0);
27679 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
27680 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27681 Shuf0->getMask());
27682 }
27683 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
27684 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
27685 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27686 // binop (splat C), (splat X) --> splat (binop C, X)
27687 SDValue X = Shuf1->getOperand(0);
27688 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
27689 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27690 Shuf1->getMask());
27691 }
27692 }
27693
27694 // The following pattern is likely to emerge with vector reduction ops. Moving
27695 // the binary operation ahead of insertion may allow using a narrower vector
27696 // instruction that has better performance than the wide version of the op:
27697 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
27698 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
27699 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
27700 LHS.getOperand(2) == RHS.getOperand(2) &&
27701 (LHS.hasOneUse() || RHS.hasOneUse())) {
27702 SDValue X = LHS.getOperand(1);
27703 SDValue Y = RHS.getOperand(1);
27704 SDValue Z = LHS.getOperand(2);
27705 EVT NarrowVT = X.getValueType();
27706 if (NarrowVT == Y.getValueType() &&
27707 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
27708 LegalOperations)) {
27709 // (binop undef, undef) may not return undef, so compute that result.
27710 SDValue VecC =
27711 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
27712 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
27713 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
27714 }
27715 }
27716
27717 // Make sure all but the first op are undef or constant.
27718 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
27719 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
27720 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
27721 return Op.isUndef() ||
27722 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
27723 });
27724 };
27725
27726 // The following pattern is likely to emerge with vector reduction ops. Moving
27727 // the binary operation ahead of the concat may allow using a narrower vector
27728 // instruction that has better performance than the wide version of the op:
27729 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
27730 // concat (VBinOp X, Y), VecC
27731 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
27732 (LHS.hasOneUse() || RHS.hasOneUse())) {
27733 EVT NarrowVT = LHS.getOperand(0).getValueType();
27734 if (NarrowVT == RHS.getOperand(0).getValueType() &&
27735 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
27736 unsigned NumOperands = LHS.getNumOperands();
27737 SmallVector<SDValue, 4> ConcatOps;
27738 for (unsigned i = 0; i != NumOperands; ++i) {
27739 // This constant fold for operands 1 and up.
27740 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
27741 RHS.getOperand(i)));
27742 }
27743
27744 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
27745 }
27746 }
27747
27748 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
27749 return V;
27750
27751 return SDValue();
27752}
27753
27754SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
27755 SDValue N2) {
27756 assert(N0.getOpcode() == ISD::SETCC &&
27757 "First argument must be a SetCC node!");
27758
27759 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
27760 cast<CondCodeSDNode>(N0.getOperand(2))->get());
27761
27762 // If we got a simplified select_cc node back from SimplifySelectCC, then
27763 // break it down into a new SETCC node, and a new SELECT node, and then return
27764 // the SELECT node, since we were called with a SELECT node.
27765 if (SCC.getNode()) {
27766 // Check to see if we got a select_cc back (to turn into setcc/select).
27767 // Otherwise, just return whatever node we got back, like fabs.
27768 if (SCC.getOpcode() == ISD::SELECT_CC) {
27769 const SDNodeFlags Flags = N0->getFlags();
27771 N0.getValueType(),
27772 SCC.getOperand(0), SCC.getOperand(1),
27773 SCC.getOperand(4), Flags);
27774 AddToWorklist(SETCC.getNode());
27775 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
27776 SCC.getOperand(2), SCC.getOperand(3));
27777 SelectNode->setFlags(Flags);
27778 return SelectNode;
27779 }
27780
27781 return SCC;
27782 }
27783 return SDValue();
27784}
27785
27786/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
27787/// being selected between, see if we can simplify the select. Callers of this
27788/// should assume that TheSelect is deleted if this returns true. As such, they
27789/// should return the appropriate thing (e.g. the node) back to the top-level of
27790/// the DAG combiner loop to avoid it being looked at.
27791bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
27792 SDValue RHS) {
27793 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27794 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
27795 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
27796 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
27797 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
27798 SDValue Sqrt = RHS;
27800 SDValue CmpLHS;
27801 const ConstantFPSDNode *Zero = nullptr;
27802
27803 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
27804 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
27805 CmpLHS = TheSelect->getOperand(0);
27806 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
27807 } else {
27808 // SELECT or VSELECT
27809 SDValue Cmp = TheSelect->getOperand(0);
27810 if (Cmp.getOpcode() == ISD::SETCC) {
27811 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
27812 CmpLHS = Cmp.getOperand(0);
27813 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
27814 }
27815 }
27816 if (Zero && Zero->isZero() &&
27817 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
27818 CC == ISD::SETULT || CC == ISD::SETLT)) {
27819 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27820 CombineTo(TheSelect, Sqrt);
27821 return true;
27822 }
27823 }
27824 }
27825 // Cannot simplify select with vector condition
27826 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
27827
27828 // If this is a select from two identical things, try to pull the operation
27829 // through the select.
27830 if (LHS.getOpcode() != RHS.getOpcode() ||
27831 !LHS.hasOneUse() || !RHS.hasOneUse())
27832 return false;
27833
27834 // If this is a load and the token chain is identical, replace the select
27835 // of two loads with a load through a select of the address to load from.
27836 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
27837 // constants have been dropped into the constant pool.
27838 if (LHS.getOpcode() == ISD::LOAD) {
27839 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
27840 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
27841
27842 // Token chains must be identical.
27843 if (LHS.getOperand(0) != RHS.getOperand(0) ||
27844 // Do not let this transformation reduce the number of volatile loads.
27845 // Be conservative for atomics for the moment
27846 // TODO: This does appear to be legal for unordered atomics (see D66309)
27847 !LLD->isSimple() || !RLD->isSimple() ||
27848 // FIXME: If either is a pre/post inc/dec load,
27849 // we'd need to split out the address adjustment.
27850 LLD->isIndexed() || RLD->isIndexed() ||
27851 // If this is an EXTLOAD, the VT's must match.
27852 LLD->getMemoryVT() != RLD->getMemoryVT() ||
27853 // If this is an EXTLOAD, the kind of extension must match.
27854 (LLD->getExtensionType() != RLD->getExtensionType() &&
27855 // The only exception is if one of the extensions is anyext.
27856 LLD->getExtensionType() != ISD::EXTLOAD &&
27857 RLD->getExtensionType() != ISD::EXTLOAD) ||
27858 // FIXME: this discards src value information. This is
27859 // over-conservative. It would be beneficial to be able to remember
27860 // both potential memory locations. Since we are discarding
27861 // src value info, don't do the transformation if the memory
27862 // locations are not in the default address space.
27863 LLD->getPointerInfo().getAddrSpace() != 0 ||
27864 RLD->getPointerInfo().getAddrSpace() != 0 ||
27865 // We can't produce a CMOV of a TargetFrameIndex since we won't
27866 // generate the address generation required.
27869 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
27870 LLD->getBasePtr().getValueType()))
27871 return false;
27872
27873 // The loads must not depend on one another.
27874 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
27875 return false;
27876
27877 // Check that the select condition doesn't reach either load. If so,
27878 // folding this will induce a cycle into the DAG. If not, this is safe to
27879 // xform, so create a select of the addresses.
27880
27883
27884 // Always fail if LLD and RLD are not independent. TheSelect is a
27885 // predecessor to all Nodes in question so we need not search past it.
27886
27887 Visited.insert(TheSelect);
27888 Worklist.push_back(LLD);
27889 Worklist.push_back(RLD);
27890
27891 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
27892 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
27893 return false;
27894
27895 SDValue Addr;
27896 if (TheSelect->getOpcode() == ISD::SELECT) {
27897 // We cannot do this optimization if any pair of {RLD, LLD} is a
27898 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
27899 // Loads, we only need to check if CondNode is a successor to one of the
27900 // loads. We can further avoid this if there's no use of their chain
27901 // value.
27902 SDNode *CondNode = TheSelect->getOperand(0).getNode();
27903 Worklist.push_back(CondNode);
27904
27905 if ((LLD->hasAnyUseOfValue(1) &&
27906 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27907 (RLD->hasAnyUseOfValue(1) &&
27908 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27909 return false;
27910
27911 Addr = DAG.getSelect(SDLoc(TheSelect),
27912 LLD->getBasePtr().getValueType(),
27913 TheSelect->getOperand(0), LLD->getBasePtr(),
27914 RLD->getBasePtr());
27915 } else { // Otherwise SELECT_CC
27916 // We cannot do this optimization if any pair of {RLD, LLD} is a
27917 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
27918 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
27919 // one of the loads. We can further avoid this if there's no use of their
27920 // chain value.
27921
27922 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
27923 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
27924 Worklist.push_back(CondLHS);
27925 Worklist.push_back(CondRHS);
27926
27927 if ((LLD->hasAnyUseOfValue(1) &&
27928 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27929 (RLD->hasAnyUseOfValue(1) &&
27930 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27931 return false;
27932
27933 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
27934 LLD->getBasePtr().getValueType(),
27935 TheSelect->getOperand(0),
27936 TheSelect->getOperand(1),
27937 LLD->getBasePtr(), RLD->getBasePtr(),
27938 TheSelect->getOperand(4));
27939 }
27940
27941 SDValue Load;
27942 // It is safe to replace the two loads if they have different alignments,
27943 // but the new load must be the minimum (most restrictive) alignment of the
27944 // inputs.
27945 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27946 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27947 if (!RLD->isInvariant())
27948 MMOFlags &= ~MachineMemOperand::MOInvariant;
27949 if (!RLD->isDereferenceable())
27950 MMOFlags &= ~MachineMemOperand::MODereferenceable;
27951 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27952 // FIXME: Discards pointer and AA info.
27953 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27954 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27955 MMOFlags);
27956 } else {
27957 // FIXME: Discards pointer and AA info.
27958 Load = DAG.getExtLoad(
27960 : LLD->getExtensionType(),
27961 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27962 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27963 }
27964
27965 // Users of the select now use the result of the load.
27966 CombineTo(TheSelect, Load);
27967
27968 // Users of the old loads now use the new load's chain. We know the
27969 // old-load value is dead now.
27970 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27971 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27972 return true;
27973 }
27974
27975 return false;
27976}
27977
27978/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27979/// bitwise 'and'.
27980SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27981 SDValue N1, SDValue N2, SDValue N3,
27982 ISD::CondCode CC) {
27983 // If this is a select where the false operand is zero and the compare is a
27984 // check of the sign bit, see if we can perform the "gzip trick":
27985 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27986 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27987 EVT XType = N0.getValueType();
27988 EVT AType = N2.getValueType();
27989 if (!isNullConstant(N3) || !XType.bitsGE(AType))
27990 return SDValue();
27991
27992 // If the comparison is testing for a positive value, we have to invert
27993 // the sign bit mask, so only do that transform if the target has a bitwise
27994 // 'and not' instruction (the invert is free).
27995 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27996 // (X > -1) ? A : 0
27997 // (X > 0) ? X : 0 <-- This is canonical signed max.
27998 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27999 return SDValue();
28000 } else if (CC == ISD::SETLT) {
28001 // (X < 0) ? A : 0
28002 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
28003 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
28004 return SDValue();
28005 } else {
28006 return SDValue();
28007 }
28008
28009 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
28010 // constant.
28011 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
28012 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
28013 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
28014 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
28015 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28016 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
28017 AddToWorklist(Shift.getNode());
28018
28019 if (XType.bitsGT(AType)) {
28020 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28021 AddToWorklist(Shift.getNode());
28022 }
28023
28024 if (CC == ISD::SETGT)
28025 Shift = DAG.getNOT(DL, Shift, AType);
28026
28027 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
28028 }
28029 }
28030
28031 unsigned ShCt = XType.getSizeInBits() - 1;
28032 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
28033 return SDValue();
28034
28035 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28036 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
28037 AddToWorklist(Shift.getNode());
28038
28039 if (XType.bitsGT(AType)) {
28040 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28041 AddToWorklist(Shift.getNode());
28042 }
28043
28044 if (CC == ISD::SETGT)
28045 Shift = DAG.getNOT(DL, Shift, AType);
28046
28047 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
28048}
28049
28050// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
28051SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
28052 SDValue N0 = N->getOperand(0);
28053 SDValue N1 = N->getOperand(1);
28054 SDValue N2 = N->getOperand(2);
28055 SDLoc DL(N);
28056
28057 unsigned BinOpc = N1.getOpcode();
28058 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
28059 (N1.getResNo() != N2.getResNo()))
28060 return SDValue();
28061
28062 // The use checks are intentionally on SDNode because we may be dealing
28063 // with opcodes that produce more than one SDValue.
28064 // TODO: Do we really need to check N0 (the condition operand of the select)?
28065 // But removing that clause could cause an infinite loop...
28066 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
28067 return SDValue();
28068
28069 // Binops may include opcodes that return multiple values, so all values
28070 // must be created/propagated from the newly created binops below.
28071 SDVTList OpVTs = N1->getVTList();
28072
28073 // Fold select(cond, binop(x, y), binop(z, y))
28074 // --> binop(select(cond, x, z), y)
28075 if (N1.getOperand(1) == N2.getOperand(1)) {
28076 SDValue N10 = N1.getOperand(0);
28077 SDValue N20 = N2.getOperand(0);
28078 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
28079 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
28080 NewBinOp->setFlags(N1->getFlags());
28081 NewBinOp->intersectFlagsWith(N2->getFlags());
28082 return SDValue(NewBinOp.getNode(), N1.getResNo());
28083 }
28084
28085 // Fold select(cond, binop(x, y), binop(x, z))
28086 // --> binop(x, select(cond, y, z))
28087 if (N1.getOperand(0) == N2.getOperand(0)) {
28088 SDValue N11 = N1.getOperand(1);
28089 SDValue N21 = N2.getOperand(1);
28090 // Second op VT might be different (e.g. shift amount type)
28091 if (N11.getValueType() == N21.getValueType()) {
28092 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
28093 SDValue NewBinOp =
28094 DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
28095 NewBinOp->setFlags(N1->getFlags());
28096 NewBinOp->intersectFlagsWith(N2->getFlags());
28097 return SDValue(NewBinOp.getNode(), N1.getResNo());
28098 }
28099 }
28100
28101 // TODO: Handle isCommutativeBinOp patterns as well?
28102 return SDValue();
28103}
28104
28105// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
28106SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
28107 SDValue N0 = N->getOperand(0);
28108 EVT VT = N->getValueType(0);
28109 bool IsFabs = N->getOpcode() == ISD::FABS;
28110 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
28111
28112 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
28113 return SDValue();
28114
28115 SDValue Int = N0.getOperand(0);
28116 EVT IntVT = Int.getValueType();
28117
28118 // The operand to cast should be integer.
28119 if (!IntVT.isInteger() || IntVT.isVector())
28120 return SDValue();
28121
28122 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
28123 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
28124 APInt SignMask;
28125 if (N0.getValueType().isVector()) {
28126 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
28127 // 0x7f...) per element and splat it.
28129 if (IsFabs)
28130 SignMask = ~SignMask;
28131 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
28132 } else {
28133 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
28134 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
28135 if (IsFabs)
28136 SignMask = ~SignMask;
28137 }
28138 SDLoc DL(N0);
28139 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
28140 DAG.getConstant(SignMask, DL, IntVT));
28141 AddToWorklist(Int.getNode());
28142 return DAG.getBitcast(VT, Int);
28143}
28144
28145/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
28146/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
28147/// in it. This may be a win when the constant is not otherwise available
28148/// because it replaces two constant pool loads with one.
28149SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
28150 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
28151 ISD::CondCode CC) {
28153 return SDValue();
28154
28155 // If we are before legalize types, we want the other legalization to happen
28156 // first (for example, to avoid messing with soft float).
28157 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
28158 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
28159 EVT VT = N2.getValueType();
28160 if (!TV || !FV || !TLI.isTypeLegal(VT))
28161 return SDValue();
28162
28163 // If a constant can be materialized without loads, this does not make sense.
28165 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
28166 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
28167 return SDValue();
28168
28169 // If both constants have multiple uses, then we won't need to do an extra
28170 // load. The values are likely around in registers for other users.
28171 if (!TV->hasOneUse() && !FV->hasOneUse())
28172 return SDValue();
28173
28174 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
28175 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
28176 Type *FPTy = Elts[0]->getType();
28177 const DataLayout &TD = DAG.getDataLayout();
28178
28179 // Create a ConstantArray of the two constants.
28180 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
28181 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
28182 TD.getPrefTypeAlign(FPTy));
28183 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
28184
28185 // Get offsets to the 0 and 1 elements of the array, so we can select between
28186 // them.
28187 SDValue Zero = DAG.getIntPtrConstant(0, DL);
28188 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
28189 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
28190 SDValue Cond =
28191 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
28192 AddToWorklist(Cond.getNode());
28193 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
28194 AddToWorklist(CstOffset.getNode());
28195 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
28196 AddToWorklist(CPIdx.getNode());
28197 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
28199 DAG.getMachineFunction()), Alignment);
28200}
28201
28202/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
28203/// where 'cond' is the comparison specified by CC.
28204SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
28206 bool NotExtCompare) {
28207 // (x ? y : y) -> y.
28208 if (N2 == N3) return N2;
28209
28210 EVT CmpOpVT = N0.getValueType();
28211 EVT CmpResVT = getSetCCResultType(CmpOpVT);
28212 EVT VT = N2.getValueType();
28213 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
28214 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
28215 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
28216
28217 // Determine if the condition we're dealing with is constant.
28218 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
28219 AddToWorklist(SCC.getNode());
28220 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
28221 // fold select_cc true, x, y -> x
28222 // fold select_cc false, x, y -> y
28223 return !(SCCC->isZero()) ? N2 : N3;
28224 }
28225 }
28226
28227 if (SDValue V =
28228 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
28229 return V;
28230
28231 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
28232 return V;
28233
28234 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
28235 // where y is has a single bit set.
28236 // A plaintext description would be, we can turn the SELECT_CC into an AND
28237 // when the condition can be materialized as an all-ones register. Any
28238 // single bit-test can be materialized as an all-ones register with
28239 // shift-left and shift-right-arith.
28240 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
28241 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
28242 SDValue AndLHS = N0->getOperand(0);
28243 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
28244 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
28245 // Shift the tested bit over the sign bit.
28246 const APInt &AndMask = ConstAndRHS->getAPIntValue();
28247 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
28248 unsigned ShCt = AndMask.getBitWidth() - 1;
28249 SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT,
28250 SDLoc(AndLHS));
28251 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
28252
28253 // Now arithmetic right shift it all the way over, so the result is
28254 // either all-ones, or zero.
28255 SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl));
28256 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
28257
28258 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
28259 }
28260 }
28261 }
28262
28263 // fold select C, 16, 0 -> shl C, 4
28264 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
28265 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
28266
28267 if ((Fold || Swap) &&
28268 TLI.getBooleanContents(CmpOpVT) ==
28270 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
28271
28272 if (Swap) {
28273 CC = ISD::getSetCCInverse(CC, CmpOpVT);
28274 std::swap(N2C, N3C);
28275 }
28276
28277 // If the caller doesn't want us to simplify this into a zext of a compare,
28278 // don't do it.
28279 if (NotExtCompare && N2C->isOne())
28280 return SDValue();
28281
28282 SDValue Temp, SCC;
28283 // zext (setcc n0, n1)
28284 if (LegalTypes) {
28285 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
28286 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
28287 } else {
28288 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
28289 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
28290 }
28291
28292 AddToWorklist(SCC.getNode());
28293 AddToWorklist(Temp.getNode());
28294
28295 if (N2C->isOne())
28296 return Temp;
28297
28298 unsigned ShCt = N2C->getAPIntValue().logBase2();
28299 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
28300 return SDValue();
28301
28302 // shl setcc result by log2 n2c
28303 return DAG.getNode(
28304 ISD::SHL, DL, N2.getValueType(), Temp,
28305 DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp)));
28306 }
28307
28308 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
28309 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
28310 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
28311 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
28312 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
28313 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
28314 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
28315 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
28316 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
28317 SDValue ValueOnZero = N2;
28318 SDValue Count = N3;
28319 // If the condition is NE instead of E, swap the operands.
28320 if (CC == ISD::SETNE)
28321 std::swap(ValueOnZero, Count);
28322 // Check if the value on zero is a constant equal to the bits in the type.
28323 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
28324 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
28325 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
28326 // legal, combine to just cttz.
28327 if ((Count.getOpcode() == ISD::CTTZ ||
28328 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
28329 N0 == Count.getOperand(0) &&
28330 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
28331 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
28332 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
28333 // legal, combine to just ctlz.
28334 if ((Count.getOpcode() == ISD::CTLZ ||
28335 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
28336 N0 == Count.getOperand(0) &&
28337 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
28338 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
28339 }
28340 }
28341 }
28342
28343 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
28344 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
28345 if (!NotExtCompare && N1C && N2C && N3C &&
28346 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
28347 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
28348 (N1C->isZero() && CC == ISD::SETLT)) &&
28349 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
28350 SDValue ASR = DAG.getNode(
28351 ISD::SRA, DL, CmpOpVT, N0,
28352 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
28353 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
28354 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
28355 }
28356
28357 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
28358 return S;
28359 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
28360 return S;
28361 if (SDValue ABD = foldSelectToABD(N0, N1, N2, N3, CC, DL))
28362 return ABD;
28363
28364 return SDValue();
28365}
28366
28367/// This is a stub for TargetLowering::SimplifySetCC.
28368SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
28369 ISD::CondCode Cond, const SDLoc &DL,
28370 bool foldBooleans) {
28372 DagCombineInfo(DAG, Level, false, this);
28373 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
28374}
28375
28376/// Given an ISD::SDIV node expressing a divide by constant, return
28377/// a DAG expression to select that will generate the same value by multiplying
28378/// by a magic number.
28379/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
28380SDValue DAGCombiner::BuildSDIV(SDNode *N) {
28381 // when optimising for minimum size, we don't want to expand a div to a mul
28382 // and a shift.
28384 return SDValue();
28385
28387 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
28388 for (SDNode *N : Built)
28389 AddToWorklist(N);
28390 return S;
28391 }
28392
28393 return SDValue();
28394}
28395
28396/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
28397/// DAG expression that will generate the same value by right shifting.
28398SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
28399 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
28400 if (!C)
28401 return SDValue();
28402
28403 // Avoid division by zero.
28404 if (C->isZero())
28405 return SDValue();
28406
28408 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
28409 for (SDNode *N : Built)
28410 AddToWorklist(N);
28411 return S;
28412 }
28413
28414 return SDValue();
28415}
28416
28417/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
28418/// expression that will generate the same value by multiplying by a magic
28419/// number.
28420/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
28421SDValue DAGCombiner::BuildUDIV(SDNode *N) {
28422 // when optimising for minimum size, we don't want to expand a div to a mul
28423 // and a shift.
28425 return SDValue();
28426
28428 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
28429 for (SDNode *N : Built)
28430 AddToWorklist(N);
28431 return S;
28432 }
28433
28434 return SDValue();
28435}
28436
28437/// Given an ISD::SREM node expressing a remainder by constant power of 2,
28438/// return a DAG expression that will generate the same value.
28439SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
28440 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
28441 if (!C)
28442 return SDValue();
28443
28444 // Avoid division by zero.
28445 if (C->isZero())
28446 return SDValue();
28447
28449 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
28450 for (SDNode *N : Built)
28451 AddToWorklist(N);
28452 return S;
28453 }
28454
28455 return SDValue();
28456}
28457
28458// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
28459//
28460// Returns the node that represents `Log2(Op)`. This may create a new node. If
28461// we are unable to compute `Log2(Op)` its return `SDValue()`.
28462//
28463// All nodes will be created at `DL` and the output will be of type `VT`.
28464//
28465// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
28466// `AssumeNonZero` if this function should simply assume (not require proving
28467// `Op` is non-zero).
28469 SDValue Op, unsigned Depth,
28470 bool AssumeNonZero) {
28471 assert(VT.isInteger() && "Only integer types are supported!");
28472
28473 auto PeekThroughCastsAndTrunc = [](SDValue V) {
28474 while (true) {
28475 switch (V.getOpcode()) {
28476 case ISD::TRUNCATE:
28477 case ISD::ZERO_EXTEND:
28478 V = V.getOperand(0);
28479 break;
28480 default:
28481 return V;
28482 }
28483 }
28484 };
28485
28486 if (VT.isScalableVector())
28487 return SDValue();
28488
28489 Op = PeekThroughCastsAndTrunc(Op);
28490
28491 // Helper for determining whether a value is a power-2 constant scalar or a
28492 // vector of such elements.
28493 SmallVector<APInt> Pow2Constants;
28494 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
28495 if (C->isZero() || C->isOpaque())
28496 return false;
28497 // TODO: We may also be able to support negative powers of 2 here.
28498 if (C->getAPIntValue().isPowerOf2()) {
28499 Pow2Constants.emplace_back(C->getAPIntValue());
28500 return true;
28501 }
28502 return false;
28503 };
28504
28505 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
28506 if (!VT.isVector())
28507 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
28508 // We need to create a build vector
28509 if (Op.getOpcode() == ISD::SPLAT_VECTOR)
28510 return DAG.getSplat(VT, DL,
28511 DAG.getConstant(Pow2Constants.back().logBase2(), DL,
28512 VT.getScalarType()));
28513 SmallVector<SDValue> Log2Ops;
28514 for (const APInt &Pow2 : Pow2Constants)
28515 Log2Ops.emplace_back(
28516 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
28517 return DAG.getBuildVector(VT, DL, Log2Ops);
28518 }
28519
28520 if (Depth >= DAG.MaxRecursionDepth)
28521 return SDValue();
28522
28523 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
28524 ToCast = PeekThroughCastsAndTrunc(ToCast);
28525 EVT CurVT = ToCast.getValueType();
28526 if (NewVT == CurVT)
28527 return ToCast;
28528
28529 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
28530 return DAG.getBitcast(NewVT, ToCast);
28531
28532 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
28533 };
28534
28535 // log2(X << Y) -> log2(X) + Y
28536 if (Op.getOpcode() == ISD::SHL) {
28537 // 1 << Y and X nuw/nsw << Y are all non-zero.
28538 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
28539 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
28540 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
28541 Depth + 1, AssumeNonZero))
28542 return DAG.getNode(ISD::ADD, DL, VT, LogX,
28543 CastToVT(VT, Op.getOperand(1)));
28544 }
28545
28546 // c ? X : Y -> c ? Log2(X) : Log2(Y)
28547 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
28548 Op.hasOneUse()) {
28549 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
28550 Depth + 1, AssumeNonZero))
28551 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
28552 Depth + 1, AssumeNonZero))
28553 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
28554 }
28555
28556 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
28557 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
28558 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
28559 Op.hasOneUse()) {
28560 // Use AssumeNonZero as false here. Otherwise we can hit case where
28561 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
28562 if (SDValue LogX =
28563 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
28564 /*AssumeNonZero*/ false))
28565 if (SDValue LogY =
28566 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
28567 /*AssumeNonZero*/ false))
28568 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
28569 }
28570
28571 return SDValue();
28572}
28573
28574/// Determines the LogBase2 value for a non-null input value using the
28575/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
28576SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
28577 bool KnownNonZero, bool InexpensiveOnly,
28578 std::optional<EVT> OutVT) {
28579 EVT VT = OutVT ? *OutVT : V.getValueType();
28580 SDValue InexpensiveLogBase2 =
28581 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
28582 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
28583 return InexpensiveLogBase2;
28584
28585 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
28586 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
28587 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
28588 return LogBase2;
28589}
28590
28591/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28592/// For the reciprocal, we need to find the zero of the function:
28593/// F(X) = 1/X - A [which has a zero at X = 1/A]
28594/// =>
28595/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
28596/// does not require additional intermediate precision]
28597/// For the last iteration, put numerator N into it to gain more precision:
28598/// Result = N X_i + X_i (N - N A X_i)
28599SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
28600 SDNodeFlags Flags) {
28601 if (LegalDAG)
28602 return SDValue();
28603
28604 // TODO: Handle extended types?
28605 EVT VT = Op.getValueType();
28606 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28607 VT.getScalarType() != MVT::f64)
28608 return SDValue();
28609
28610 // If estimates are explicitly disabled for this function, we're done.
28612 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
28613 if (Enabled == TLI.ReciprocalEstimate::Disabled)
28614 return SDValue();
28615
28616 // Estimates may be explicitly enabled for this type with a custom number of
28617 // refinement steps.
28618 int Iterations = TLI.getDivRefinementSteps(VT, MF);
28619 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
28620 AddToWorklist(Est.getNode());
28621
28622 SDLoc DL(Op);
28623 if (Iterations) {
28624 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
28625
28626 // Newton iterations: Est = Est + Est (N - Arg * Est)
28627 // If this is the last iteration, also multiply by the numerator.
28628 for (int i = 0; i < Iterations; ++i) {
28629 SDValue MulEst = Est;
28630
28631 if (i == Iterations - 1) {
28632 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
28633 AddToWorklist(MulEst.getNode());
28634 }
28635
28636 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
28637 AddToWorklist(NewEst.getNode());
28638
28639 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
28640 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
28641 AddToWorklist(NewEst.getNode());
28642
28643 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28644 AddToWorklist(NewEst.getNode());
28645
28646 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
28647 AddToWorklist(Est.getNode());
28648 }
28649 } else {
28650 // If no iterations are available, multiply with N.
28651 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
28652 AddToWorklist(Est.getNode());
28653 }
28654
28655 return Est;
28656 }
28657
28658 return SDValue();
28659}
28660
28661/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28662/// For the reciprocal sqrt, we need to find the zero of the function:
28663/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28664/// =>
28665/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
28666/// As a result, we precompute A/2 prior to the iteration loop.
28667SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
28668 unsigned Iterations,
28669 SDNodeFlags Flags, bool Reciprocal) {
28670 EVT VT = Arg.getValueType();
28671 SDLoc DL(Arg);
28672 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
28673
28674 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
28675 // this entire sequence requires only one FP constant.
28676 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
28677 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
28678
28679 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
28680 for (unsigned i = 0; i < Iterations; ++i) {
28681 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
28682 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
28683 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
28684 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28685 }
28686
28687 // If non-reciprocal square root is requested, multiply the result by Arg.
28688 if (!Reciprocal)
28689 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
28690
28691 return Est;
28692}
28693
28694/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28695/// For the reciprocal sqrt, we need to find the zero of the function:
28696/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28697/// =>
28698/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
28699SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
28700 unsigned Iterations,
28701 SDNodeFlags Flags, bool Reciprocal) {
28702 EVT VT = Arg.getValueType();
28703 SDLoc DL(Arg);
28704 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
28705 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
28706
28707 // This routine must enter the loop below to work correctly
28708 // when (Reciprocal == false).
28709 assert(Iterations > 0);
28710
28711 // Newton iterations for reciprocal square root:
28712 // E = (E * -0.5) * ((A * E) * E + -3.0)
28713 for (unsigned i = 0; i < Iterations; ++i) {
28714 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
28715 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
28716 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
28717
28718 // When calculating a square root at the last iteration build:
28719 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
28720 // (notice a common subexpression)
28721 SDValue LHS;
28722 if (Reciprocal || (i + 1) < Iterations) {
28723 // RSQRT: LHS = (E * -0.5)
28724 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
28725 } else {
28726 // SQRT: LHS = (A * E) * -0.5
28727 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
28728 }
28729
28730 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
28731 }
28732
28733 return Est;
28734}
28735
28736/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
28737/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
28738/// Op can be zero.
28739SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
28740 bool Reciprocal) {
28741 if (LegalDAG)
28742 return SDValue();
28743
28744 // TODO: Handle extended types?
28745 EVT VT = Op.getValueType();
28746 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28747 VT.getScalarType() != MVT::f64)
28748 return SDValue();
28749
28750 // If estimates are explicitly disabled for this function, we're done.
28752 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
28753 if (Enabled == TLI.ReciprocalEstimate::Disabled)
28754 return SDValue();
28755
28756 // Estimates may be explicitly enabled for this type with a custom number of
28757 // refinement steps.
28758 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
28759
28760 bool UseOneConstNR = false;
28761 if (SDValue Est =
28762 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
28763 Reciprocal)) {
28764 AddToWorklist(Est.getNode());
28765
28766 if (Iterations > 0)
28767 Est = UseOneConstNR
28768 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
28769 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
28770 if (!Reciprocal) {
28771 SDLoc DL(Op);
28772 // Try the target specific test first.
28773 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
28774
28775 // The estimate is now completely wrong if the input was exactly 0.0 or
28776 // possibly a denormal. Force the answer to 0.0 or value provided by
28777 // target for those cases.
28778 Est = DAG.getNode(
28779 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
28780 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
28781 }
28782 return Est;
28783 }
28784
28785 return SDValue();
28786}
28787
28788SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28789 return buildSqrtEstimateImpl(Op, Flags, true);
28790}
28791
28792SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28793 return buildSqrtEstimateImpl(Op, Flags, false);
28794}
28795
28796/// Return true if there is any possibility that the two addresses overlap.
28797bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
28798
28799 struct MemUseCharacteristics {
28800 bool IsVolatile;
28801 bool IsAtomic;
28803 int64_t Offset;
28804 LocationSize NumBytes;
28805 MachineMemOperand *MMO;
28806 };
28807
28808 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
28809 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
28810 int64_t Offset = 0;
28811 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
28812 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
28813 : (LSN->getAddressingMode() == ISD::PRE_DEC)
28814 ? -1 * C->getSExtValue()
28815 : 0;
28816 TypeSize Size = LSN->getMemoryVT().getStoreSize();
28817 return {LSN->isVolatile(), LSN->isAtomic(),
28818 LSN->getBasePtr(), Offset /*base offset*/,
28819 LocationSize::precise(Size), LSN->getMemOperand()};
28820 }
28821 if (const auto *LN = cast<LifetimeSDNode>(N))
28822 return {false /*isVolatile*/,
28823 /*isAtomic*/ false,
28824 LN->getOperand(1),
28825 (LN->hasOffset()) ? LN->getOffset() : 0,
28826 (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
28828 (MachineMemOperand *)nullptr};
28829 // Default.
28830 return {false /*isvolatile*/,
28831 /*isAtomic*/ false,
28832 SDValue(),
28833 (int64_t)0 /*offset*/,
28835 (MachineMemOperand *)nullptr};
28836 };
28837
28838 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
28839 MUC1 = getCharacteristics(Op1);
28840
28841 // If they are to the same address, then they must be aliases.
28842 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
28843 MUC0.Offset == MUC1.Offset)
28844 return true;
28845
28846 // If they are both volatile then they cannot be reordered.
28847 if (MUC0.IsVolatile && MUC1.IsVolatile)
28848 return true;
28849
28850 // Be conservative about atomics for the moment
28851 // TODO: This is way overconservative for unordered atomics (see D66309)
28852 if (MUC0.IsAtomic && MUC1.IsAtomic)
28853 return true;
28854
28855 if (MUC0.MMO && MUC1.MMO) {
28856 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28857 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28858 return false;
28859 }
28860
28861 // If NumBytes is scalable and offset is not 0, conservatively return may
28862 // alias
28863 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
28864 MUC0.Offset != 0) ||
28865 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
28866 MUC1.Offset != 0))
28867 return true;
28868 // Try to prove that there is aliasing, or that there is no aliasing. Either
28869 // way, we can return now. If nothing can be proved, proceed with more tests.
28870 bool IsAlias;
28871 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
28872 DAG, IsAlias))
28873 return IsAlias;
28874
28875 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
28876 // either are not known.
28877 if (!MUC0.MMO || !MUC1.MMO)
28878 return true;
28879
28880 // If one operation reads from invariant memory, and the other may store, they
28881 // cannot alias. These should really be checking the equivalent of mayWrite,
28882 // but it only matters for memory nodes other than load /store.
28883 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28884 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28885 return false;
28886
28887 // If we know required SrcValue1 and SrcValue2 have relatively large
28888 // alignment compared to the size and offset of the access, we may be able
28889 // to prove they do not alias. This check is conservative for now to catch
28890 // cases created by splitting vector types, it only works when the offsets are
28891 // multiples of the size of the data.
28892 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
28893 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
28894 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
28895 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
28896 LocationSize Size0 = MUC0.NumBytes;
28897 LocationSize Size1 = MUC1.NumBytes;
28898
28899 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
28900 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
28901 !Size1.isScalable() && Size0 == Size1 &&
28902 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
28903 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
28904 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
28905 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
28906 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
28907
28908 // There is no overlap between these relatively aligned accesses of
28909 // similar size. Return no alias.
28910 if ((OffAlign0 + static_cast<int64_t>(
28911 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
28912 (OffAlign1 + static_cast<int64_t>(
28913 Size1.getValue().getKnownMinValue())) <= OffAlign0)
28914 return false;
28915 }
28916
28919 : DAG.getSubtarget().useAA();
28920#ifndef NDEBUG
28921 if (CombinerAAOnlyFunc.getNumOccurrences() &&
28923 UseAA = false;
28924#endif
28925
28926 if (UseAA && BatchAA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
28927 Size0.hasValue() && Size1.hasValue() &&
28928 // Can't represent a scalable size + fixed offset in LocationSize
28929 (!Size0.isScalable() || SrcValOffset0 == 0) &&
28930 (!Size1.isScalable() || SrcValOffset1 == 0)) {
28931 // Use alias analysis information.
28932 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
28933 int64_t Overlap0 =
28934 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
28935 int64_t Overlap1 =
28936 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
28937 LocationSize Loc0 =
28938 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
28939 LocationSize Loc1 =
28940 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
28941 if (BatchAA->isNoAlias(
28942 MemoryLocation(MUC0.MMO->getValue(), Loc0,
28943 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
28944 MemoryLocation(MUC1.MMO->getValue(), Loc1,
28945 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
28946 return false;
28947 }
28948
28949 // Otherwise we have to assume they alias.
28950 return true;
28951}
28952
28953/// Walk up chain skipping non-aliasing memory nodes,
28954/// looking for aliasing nodes and adding them to the Aliases vector.
28955void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
28956 SmallVectorImpl<SDValue> &Aliases) {
28957 SmallVector<SDValue, 8> Chains; // List of chains to visit.
28958 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
28959
28960 // Get alias information for node.
28961 // TODO: relax aliasing for unordered atomics (see D66309)
28962 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28963
28964 // Starting off.
28965 Chains.push_back(OriginalChain);
28966 unsigned Depth = 0;
28967
28968 // Attempt to improve chain by a single step
28969 auto ImproveChain = [&](SDValue &C) -> bool {
28970 switch (C.getOpcode()) {
28971 case ISD::EntryToken:
28972 // No need to mark EntryToken.
28973 C = SDValue();
28974 return true;
28975 case ISD::LOAD:
28976 case ISD::STORE: {
28977 // Get alias information for C.
28978 // TODO: Relax aliasing for unordered atomics (see D66309)
28979 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28980 cast<LSBaseSDNode>(C.getNode())->isSimple();
28981 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28982 // Look further up the chain.
28983 C = C.getOperand(0);
28984 return true;
28985 }
28986 // Alias, so stop here.
28987 return false;
28988 }
28989
28990 case ISD::CopyFromReg:
28991 // Always forward past CopyFromReg.
28992 C = C.getOperand(0);
28993 return true;
28994
28996 case ISD::LIFETIME_END: {
28997 // We can forward past any lifetime start/end that can be proven not to
28998 // alias the memory access.
28999 if (!mayAlias(N, C.getNode())) {
29000 // Look further up the chain.
29001 C = C.getOperand(0);
29002 return true;
29003 }
29004 return false;
29005 }
29006 default:
29007 return false;
29008 }
29009 };
29010
29011 // Look at each chain and determine if it is an alias. If so, add it to the
29012 // aliases list. If not, then continue up the chain looking for the next
29013 // candidate.
29014 while (!Chains.empty()) {
29015 SDValue Chain = Chains.pop_back_val();
29016
29017 // Don't bother if we've seen Chain before.
29018 if (!Visited.insert(Chain.getNode()).second)
29019 continue;
29020
29021 // For TokenFactor nodes, look at each operand and only continue up the
29022 // chain until we reach the depth limit.
29023 //
29024 // FIXME: The depth check could be made to return the last non-aliasing
29025 // chain we found before we hit a tokenfactor rather than the original
29026 // chain.
29027 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
29028 Aliases.clear();
29029 Aliases.push_back(OriginalChain);
29030 return;
29031 }
29032
29033 if (Chain.getOpcode() == ISD::TokenFactor) {
29034 // We have to check each of the operands of the token factor for "small"
29035 // token factors, so we queue them up. Adding the operands to the queue
29036 // (stack) in reverse order maintains the original order and increases the
29037 // likelihood that getNode will find a matching token factor (CSE.)
29038 if (Chain.getNumOperands() > 16) {
29039 Aliases.push_back(Chain);
29040 continue;
29041 }
29042 for (unsigned n = Chain.getNumOperands(); n;)
29043 Chains.push_back(Chain.getOperand(--n));
29044 ++Depth;
29045 continue;
29046 }
29047 // Everything else
29048 if (ImproveChain(Chain)) {
29049 // Updated Chain Found, Consider new chain if one exists.
29050 if (Chain.getNode())
29051 Chains.push_back(Chain);
29052 ++Depth;
29053 continue;
29054 }
29055 // No Improved Chain Possible, treat as Alias.
29056 Aliases.push_back(Chain);
29057 }
29058}
29059
29060/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
29061/// (aliasing node.)
29062SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
29063 if (OptLevel == CodeGenOptLevel::None)
29064 return OldChain;
29065
29066 // Ops for replacing token factor.
29068
29069 // Accumulate all the aliases to this node.
29070 GatherAllAliases(N, OldChain, Aliases);
29071
29072 // If no operands then chain to entry token.
29073 if (Aliases.empty())
29074 return DAG.getEntryNode();
29075
29076 // If a single operand then chain to it. We don't need to revisit it.
29077 if (Aliases.size() == 1)
29078 return Aliases[0];
29079
29080 // Construct a custom tailored token factor.
29081 return DAG.getTokenFactor(SDLoc(N), Aliases);
29082}
29083
29084// This function tries to collect a bunch of potentially interesting
29085// nodes to improve the chains of, all at once. This might seem
29086// redundant, as this function gets called when visiting every store
29087// node, so why not let the work be done on each store as it's visited?
29088//
29089// I believe this is mainly important because mergeConsecutiveStores
29090// is unable to deal with merging stores of different sizes, so unless
29091// we improve the chains of all the potential candidates up-front
29092// before running mergeConsecutiveStores, it might only see some of
29093// the nodes that will eventually be candidates, and then not be able
29094// to go from a partially-merged state to the desired final
29095// fully-merged state.
29096
29097bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
29098 SmallVector<StoreSDNode *, 8> ChainedStores;
29099 StoreSDNode *STChain = St;
29100 // Intervals records which offsets from BaseIndex have been covered. In
29101 // the common case, every store writes to the immediately previous address
29102 // space and thus merged with the previous interval at insertion time.
29103
29104 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
29106 IMap::Allocator A;
29107 IMap Intervals(A);
29108
29109 // This holds the base pointer, index, and the offset in bytes from the base
29110 // pointer.
29112
29113 // We must have a base and an offset.
29114 if (!BasePtr.getBase().getNode())
29115 return false;
29116
29117 // Do not handle stores to undef base pointers.
29118 if (BasePtr.getBase().isUndef())
29119 return false;
29120
29121 // Do not handle stores to opaque types
29122 if (St->getMemoryVT().isZeroSized())
29123 return false;
29124
29125 // BaseIndexOffset assumes that offsets are fixed-size, which
29126 // is not valid for scalable vectors where the offsets are
29127 // scaled by `vscale`, so bail out early.
29128 if (St->getMemoryVT().isScalableVT())
29129 return false;
29130
29131 // Add ST's interval.
29132 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
29133 std::monostate{});
29134
29135 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
29136 if (Chain->getMemoryVT().isScalableVector())
29137 return false;
29138
29139 // If the chain has more than one use, then we can't reorder the mem ops.
29140 if (!SDValue(Chain, 0)->hasOneUse())
29141 break;
29142 // TODO: Relax for unordered atomics (see D66309)
29143 if (!Chain->isSimple() || Chain->isIndexed())
29144 break;
29145
29146 // Find the base pointer and offset for this memory node.
29147 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
29148 // Check that the base pointer is the same as the original one.
29149 int64_t Offset;
29150 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
29151 break;
29152 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
29153 // Make sure we don't overlap with other intervals by checking the ones to
29154 // the left or right before inserting.
29155 auto I = Intervals.find(Offset);
29156 // If there's a next interval, we should end before it.
29157 if (I != Intervals.end() && I.start() < (Offset + Length))
29158 break;
29159 // If there's a previous interval, we should start after it.
29160 if (I != Intervals.begin() && (--I).stop() <= Offset)
29161 break;
29162 Intervals.insert(Offset, Offset + Length, std::monostate{});
29163
29164 ChainedStores.push_back(Chain);
29165 STChain = Chain;
29166 }
29167
29168 // If we didn't find a chained store, exit.
29169 if (ChainedStores.empty())
29170 return false;
29171
29172 // Improve all chained stores (St and ChainedStores members) starting from
29173 // where the store chain ended and return single TokenFactor.
29174 SDValue NewChain = STChain->getChain();
29176 for (unsigned I = ChainedStores.size(); I;) {
29177 StoreSDNode *S = ChainedStores[--I];
29178 SDValue BetterChain = FindBetterChain(S, NewChain);
29179 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
29180 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
29181 TFOps.push_back(SDValue(S, 0));
29182 ChainedStores[I] = S;
29183 }
29184
29185 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
29186 SDValue BetterChain = FindBetterChain(St, NewChain);
29187 SDValue NewST;
29188 if (St->isTruncatingStore())
29189 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
29190 St->getBasePtr(), St->getMemoryVT(),
29191 St->getMemOperand());
29192 else
29193 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
29194 St->getBasePtr(), St->getMemOperand());
29195
29196 TFOps.push_back(NewST);
29197
29198 // If we improved every element of TFOps, then we've lost the dependence on
29199 // NewChain to successors of St and we need to add it back to TFOps. Do so at
29200 // the beginning to keep relative order consistent with FindBetterChains.
29201 auto hasImprovedChain = [&](SDValue ST) -> bool {
29202 return ST->getOperand(0) != NewChain;
29203 };
29204 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
29205 if (AddNewChain)
29206 TFOps.insert(TFOps.begin(), NewChain);
29207
29208 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
29209 CombineTo(St, TF);
29210
29211 // Add TF and its operands to the worklist.
29212 AddToWorklist(TF.getNode());
29213 for (const SDValue &Op : TF->ops())
29214 AddToWorklist(Op.getNode());
29215 AddToWorklist(STChain);
29216 return true;
29217}
29218
29219bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
29220 if (OptLevel == CodeGenOptLevel::None)
29221 return false;
29222
29224
29225 // We must have a base and an offset.
29226 if (!BasePtr.getBase().getNode())
29227 return false;
29228
29229 // Do not handle stores to undef base pointers.
29230 if (BasePtr.getBase().isUndef())
29231 return false;
29232
29233 // Directly improve a chain of disjoint stores starting at St.
29234 if (parallelizeChainedStores(St))
29235 return true;
29236
29237 // Improve St's Chain..
29238 SDValue BetterChain = FindBetterChain(St, St->getChain());
29239 if (St->getChain() != BetterChain) {
29240 replaceStoreChain(St, BetterChain);
29241 return true;
29242 }
29243 return false;
29244}
29245
29246/// This is the entry point for the file.
29248 CodeGenOptLevel OptLevel) {
29249 /// This is the main entry point to this class.
29250 DAGCombiner(*this, BatchAA, OptLevel).Run(Level);
29251}
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static const LLT S1
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static cl::opt< bool > ReduceLoadOpStoreWidthForceNarrowingProfitable("combiner-reduce-load-op-store-width-force-narrowing-profitable", cl::Hidden, cl::init(false), cl::desc("DAG combiner force override the narrowing profitable check when " "reducing the width of load/op/store sequences"))
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT, SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue FoldIntToFPToInt(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static cl::opt< bool > EnableVectorFCopySignExtendRound("combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), cl::desc("Enable merging extends and rounds into FCOPYSIGN on vector types"))
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue detectUSatUPattern(SDValue In, EVT VT)
Detect patterns of truncation with unsigned saturation:
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL)
Detect patterns of truncation with unsigned saturation:
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue detectSSatSPattern(SDValue In, EVT VT)
Detect patterns of truncation with signed saturation: (truncate (smin (smax (x, signed_min_of_dest_ty...
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:500
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isUndef(const MachineInstr &MI)
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file describes how to lower LLVM code to machine code.
static constexpr int Concat[]
Value * RHS
Value * LHS
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition: APFloat.h:1122
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1210
bool isNegative() const
Definition: APFloat.h:1445
bool isNormal() const
Definition: APFloat.h:1449
bool isDenormal() const
Definition: APFloat.h:1446
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1428
const fltSemantics & getSemantics() const
Definition: APFloat.h:1453
bool isNaN() const
Definition: APFloat.h:1443
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition: APFloat.h:1090
APInt bitcastToAPInt() const
Definition: APFloat.h:1351
bool isLargest() const
Definition: APFloat.h:1461
bool isIEEE() const
Definition: APFloat.h:1463
bool isInfinity() const
Definition: APFloat.h:1442
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1945
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1732
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:617
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:449
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1649
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1007
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
APInt abs() const
Get the absolute value.
Definition: APInt.h:1773
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition: APInt.h:466
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1640
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1249
int32_t exactLogBase2() const
Definition: APInt.h:1761
APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1909
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1618
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1577
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
unsigned countLeadingZeros() const
Definition: APInt.h:1585
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1434
unsigned logBase2() const
Definition: APInt.h:1739
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:510
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:471
APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1934
bool isMask(unsigned numBits) const
Definition: APInt.h:488
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1150
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:455
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1635
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:207
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
This is an SDNode representing atomic operations.
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Represents known origin of an individual byte in combine pattern.
Definition: ByteProvider.h:30
static ByteProvider getConstantZero()
Definition: ByteProvider.h:73
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
Definition: ByteProvider.h:66
Combiner implementation.
Definition: Combiner.h:34
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1312
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:197
bool isBigEndian() const
Definition: DataLayout.h:198
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:457
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
iterator end()
Definition: DenseMap.h:84
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:716
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:365
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
static MVT getIntegerVT(unsigned BitWidth)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getIndex() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:422
iterator end() const
Definition: ArrayRef.h:360
iterator begin() const
Definition: ArrayRef.h:359
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:415
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
void intersectFlagsWith(const SDNodeFlags Flags)
Clear any flags in this node that aren't also set in Flags.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOptLevel OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
Definition: SelectionDAG.h:371
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:983
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:577
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:499
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
void Combine(CombineLevel Level, BatchAAResults *BatchAA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static unsigned getHasPredecessorMaxSteps()
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:458
SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
void salvageDebugInfo(SDNode &N)
To be invoked on an SDNode that is slated to be erased.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:857
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
bool isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:891
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:569
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:504
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:874
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:586
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
std::optional< bool > isBoolConstant(SDValue N, bool AllowTruncation=false) const
Check if a value \op N is a constant using the target's BooleanContent for its type.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:907
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
Definition: SelectionDAG.h:937
bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
bool empty() const
Definition: SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:704
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:64
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent an VP_SCATTER node.
const SDValue & getValue() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
iterator_range< use_iterator > uses()
Definition: Value.h:376
int getNumOccurrences() const
Definition: CommandLine.h:399
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2217
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2222
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2227
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2232
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1360
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1450
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:374
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:502
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:380
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1435
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1439
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:871
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1449
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1494
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1432
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ TRUNCATE_SSAT_U
Definition: ISDOpcodes.h:834
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1436
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:788
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:661
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:515
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1451
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ TargetConstantFP
Definition: ISDOpcodes.h:165
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1407
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:366
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ TargetFrameIndex
Definition: ISDOpcodes.h:172
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:860
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1377
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1372
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:1262
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1452
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:973
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:164
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ GET_FPENV_MEM
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1078
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:267
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1433
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1004
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:669
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:882
@ EXPERIMENTAL_VECTOR_HISTOGRAM
Definition: ISDOpcodes.h:1481
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1440
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1211
@ SET_FPENV_MEM
Sets the current floating point environment.
Definition: ISDOpcodes.h:1083
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1055
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition: ISDOpcodes.h:832
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ TRUNCATE_USAT_U
Definition: ISDOpcodes.h:836
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:320
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isIndexTypeSigned(MemIndexType IndexType)
Definition: ISDOpcodes.h:1576
bool isExtVecInRegOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1686
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
Definition: ISDOpcodes.h:1661
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1681
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1498
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1572
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1572
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1643
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
NodeType getInverseMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns ISD::(U|S)MAX and ISD::(U|S)MIN,...
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1559
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1610
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1590
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1655
@ VecLoad
Definition: NVPTX.h:93
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:982
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:903
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
Opcode_match m_Opc(unsigned Opcode)
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
BinaryOpc_match< LHS, RHS > m_Sra(const LHS &L, const RHS &R)
auto m_UMinLike(const LHS &L, const RHS &R)
auto m_UMaxLike(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
And< Preds... > m_AllOf(const Preds &...preds)
TernaryOpc_match< T0_P, T1_P, T2_P > m_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
auto m_SMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Ctlz(const Opnd &Op)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
UnaryOpc_match< Opnd > m_UnaryOp(unsigned Opc, const Opnd &Op)
auto m_SMinLike(const LHS &L, const RHS &R)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
NUses_match< 1, Value_match > m_OneUse()
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
bool sd_context_match(SDValue N, const MatchContext &Ctx, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any interger constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4771
constexpr double e
Definition: MathExtras.h:47
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:354
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:854
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1565
SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2082
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:360
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
Definition: DynamicAPInt.h:518
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:297
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1547
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:347
bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:395
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:341
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1503
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2087
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:359
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:383
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:764
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:323
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:302
static ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:319
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:315
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:318
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:329
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
bool knownBitsLE(EVT VT) const
Return true if we know at compile time this has fewer than or the same bits as VT.
Definition: ValueTypes.h:274
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:465
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:407
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isScalableVT() const
Return true if the type is a scalable type.
Definition: ValueTypes.h:187
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:251
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:243
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition: ValueTypes.h:263
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:320
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition: ValueTypes.h:132
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:100
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:234
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:53
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:288
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:240
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:82
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:59
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
void setDisjoint(bool b)
bool hasNoUnsignedWrap() const
bool hasDisjoint() const
bool hasNoSignedWrap() const
bool hasNonNeg() const
bool hasAllowReassociation() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:317
virtual void NodeDeleted(SDNode *N, SDNode *E)
The node N that was deleted and, if E is not null, an equivalent node E that replaced it.
virtual void NodeInserted(SDNode *N)
The node N that was inserted.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...