LLVM 22.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
45 : TargetLoweringBase(tm) {}
46
47// Define the virtual destructor out-of-line for build efficiency.
49
50const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
51 return nullptr;
52}
53
56}
57
58/// Check whether a given call node is in tail position within its function. If
59/// so, it sets Chain to the input chain of the tail call.
61 SDValue &Chain) const {
63
64 // First, check if tail calls have been disabled in this function.
65 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
66 return false;
67
68 // Conservatively require the attributes of the call to match those of
69 // the return. Ignore following attributes because they don't affect the
70 // call sequence.
71 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
72 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
73 Attribute::DereferenceableOrNull, Attribute::NoAlias,
74 Attribute::NonNull, Attribute::NoUndef,
75 Attribute::Range, Attribute::NoFPClass})
76 CallerAttrs.removeAttribute(Attr);
77
78 if (CallerAttrs.hasAttributes())
79 return false;
80
81 // It's not safe to eliminate the sign / zero extension of the return value.
82 if (CallerAttrs.contains(Attribute::ZExt) ||
83 CallerAttrs.contains(Attribute::SExt))
84 return false;
85
86 // Check if the only use is a function return node.
87 return isUsedByReturnOnly(Node, Chain);
88}
89
91 const uint32_t *CallerPreservedMask,
92 const SmallVectorImpl<CCValAssign> &ArgLocs,
93 const SmallVectorImpl<SDValue> &OutVals) const {
94 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
95 const CCValAssign &ArgLoc = ArgLocs[I];
96 if (!ArgLoc.isRegLoc())
97 continue;
98 MCRegister Reg = ArgLoc.getLocReg();
99 // Only look at callee saved registers.
100 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
101 continue;
102 // Check that we pass the value used for the caller.
103 // (We look for a CopyFromReg reading a virtual register that is used
104 // for the function live-in value of register Reg)
105 SDValue Value = OutVals[I];
106 if (Value->getOpcode() == ISD::AssertZext)
107 Value = Value.getOperand(0);
108 if (Value->getOpcode() != ISD::CopyFromReg)
109 return false;
110 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
111 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
112 return false;
113 }
114 return true;
115}
116
117/// Set CallLoweringInfo attribute flags based on a call instruction
118/// and called function attributes.
120 unsigned ArgIdx) {
121 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
122 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
123 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
124 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
125 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
126 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
127 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
128 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
129 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
130 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
131 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
132 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
133 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
134 Alignment = Call->getParamStackAlign(ArgIdx);
135 IndirectType = nullptr;
137 "multiple ABI attributes?");
138 if (IsByVal) {
139 IndirectType = Call->getParamByValType(ArgIdx);
140 if (!Alignment)
141 Alignment = Call->getParamAlign(ArgIdx);
142 }
143 if (IsPreallocated)
144 IndirectType = Call->getParamPreallocatedType(ArgIdx);
145 if (IsInAlloca)
146 IndirectType = Call->getParamInAllocaType(ArgIdx);
147 if (IsSRet)
148 IndirectType = Call->getParamStructRetType(ArgIdx);
149}
150
151/// Generate a libcall taking the given operands as arguments and returning a
152/// result of type RetVT.
153std::pair<SDValue, SDValue>
154TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
156 MakeLibCallOptions CallOptions,
157 const SDLoc &dl,
158 SDValue InChain) const {
159 if (!InChain)
160 InChain = DAG.getEntryNode();
161
163 Args.reserve(Ops.size());
164
165 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
166 for (unsigned i = 0; i < Ops.size(); ++i) {
167 SDValue NewOp = Ops[i];
168 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
169 ? OpsTypeOverrides[i]
170 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
171 TargetLowering::ArgListEntry Entry(NewOp, Ty);
172 if (CallOptions.IsSoften)
173 Entry.OrigTy =
174 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
175
176 Entry.IsSExt =
177 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
178 Entry.IsZExt = !Entry.IsSExt;
179
180 if (CallOptions.IsSoften &&
182 Entry.IsSExt = Entry.IsZExt = false;
183 }
184 Args.push_back(Entry);
185 }
186
187 const char *LibcallName = getLibcallName(LC);
188 if (LC == RTLIB::UNKNOWN_LIBCALL || !LibcallName)
189 reportFatalInternalError("unsupported library call operation");
190
191 SDValue Callee =
192 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
193
194 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
195 Type *OrigRetTy = RetTy;
197 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
198 bool zeroExtend = !signExtend;
199
200 if (CallOptions.IsSoften) {
201 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
203 signExtend = zeroExtend = false;
204 }
205
206 CLI.setDebugLoc(dl)
207 .setChain(InChain)
208 .setLibCallee(getLibcallCallingConv(LC), RetTy, OrigRetTy, Callee,
209 std::move(Args))
210 .setNoReturn(CallOptions.DoesNotReturn)
213 .setSExtResult(signExtend)
214 .setZExtResult(zeroExtend);
215 return LowerCallTo(CLI);
216}
217
219 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
220 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
221 const AttributeList &FuncAttributes) const {
222 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
223 Op.getSrcAlign() < Op.getDstAlign())
224 return false;
225
226 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
227
228 if (VT == MVT::Other) {
229 // Use the largest integer type whose alignment constraints are satisfied.
230 // We only need to check DstAlign here as SrcAlign is always greater or
231 // equal to DstAlign (or zero).
232 VT = MVT::LAST_INTEGER_VALUETYPE;
233 if (Op.isFixedDstAlign())
234 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
235 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
237 assert(VT.isInteger());
238
239 // Find the largest legal integer type.
240 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
241 while (!isTypeLegal(LVT))
242 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
243 assert(LVT.isInteger());
244
245 // If the type we've chosen is larger than the largest legal integer type
246 // then use that instead.
247 if (VT.bitsGT(LVT))
248 VT = LVT;
249 }
250
251 unsigned NumMemOps = 0;
252 uint64_t Size = Op.size();
253 while (Size) {
254 unsigned VTSize = VT.getSizeInBits() / 8;
255 while (VTSize > Size) {
256 // For now, only use non-vector load / store's for the left-over pieces.
257 EVT NewVT = VT;
258 unsigned NewVTSize;
259
260 bool Found = false;
261 if (VT.isVector() || VT.isFloatingPoint()) {
262 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
265 Found = true;
266 else if (NewVT == MVT::i64 &&
268 isSafeMemOpType(MVT::f64)) {
269 // i64 is usually not legal on 32-bit targets, but f64 may be.
270 NewVT = MVT::f64;
271 Found = true;
272 }
273 }
274
275 if (!Found) {
276 do {
277 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
278 if (NewVT == MVT::i8)
279 break;
280 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
281 }
282 NewVTSize = NewVT.getSizeInBits() / 8;
283
284 // If the new VT cannot cover all of the remaining bits, then consider
285 // issuing a (or a pair of) unaligned and overlapping load / store.
286 unsigned Fast;
287 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
289 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
291 Fast)
292 VTSize = Size;
293 else {
294 VT = NewVT;
295 VTSize = NewVTSize;
296 }
297 }
298
299 if (++NumMemOps > Limit)
300 return false;
301
302 MemOps.push_back(VT);
303 Size -= VTSize;
304 }
305
306 return true;
307}
308
309/// Soften the operands of a comparison. This code is shared among BR_CC,
310/// SELECT_CC, and SETCC handlers.
312 SDValue &NewLHS, SDValue &NewRHS,
313 ISD::CondCode &CCCode,
314 const SDLoc &dl, const SDValue OldLHS,
315 const SDValue OldRHS) const {
316 SDValue Chain;
317 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
318 OldRHS, Chain);
319}
320
322 SDValue &NewLHS, SDValue &NewRHS,
323 ISD::CondCode &CCCode,
324 const SDLoc &dl, const SDValue OldLHS,
325 const SDValue OldRHS,
326 SDValue &Chain,
327 bool IsSignaling) const {
328 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
329 // not supporting it. We can update this code when libgcc provides such
330 // functions.
331
332 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
333 && "Unsupported setcc type!");
334
335 // Expand into one or more soft-fp libcall(s).
336 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
337 bool ShouldInvertCC = false;
338 switch (CCCode) {
339 case ISD::SETEQ:
340 case ISD::SETOEQ:
341 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
342 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
343 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
344 break;
345 case ISD::SETNE:
346 case ISD::SETUNE:
347 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
348 (VT == MVT::f64) ? RTLIB::UNE_F64 :
349 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
350 break;
351 case ISD::SETGE:
352 case ISD::SETOGE:
353 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
354 (VT == MVT::f64) ? RTLIB::OGE_F64 :
355 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
356 break;
357 case ISD::SETLT:
358 case ISD::SETOLT:
359 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
360 (VT == MVT::f64) ? RTLIB::OLT_F64 :
361 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
362 break;
363 case ISD::SETLE:
364 case ISD::SETOLE:
365 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
366 (VT == MVT::f64) ? RTLIB::OLE_F64 :
367 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
368 break;
369 case ISD::SETGT:
370 case ISD::SETOGT:
371 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
372 (VT == MVT::f64) ? RTLIB::OGT_F64 :
373 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
374 break;
375 case ISD::SETO:
376 ShouldInvertCC = true;
377 [[fallthrough]];
378 case ISD::SETUO:
379 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
380 (VT == MVT::f64) ? RTLIB::UO_F64 :
381 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
382 break;
383 case ISD::SETONE:
384 // SETONE = O && UNE
385 ShouldInvertCC = true;
386 [[fallthrough]];
387 case ISD::SETUEQ:
388 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
389 (VT == MVT::f64) ? RTLIB::UO_F64 :
390 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
391 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
392 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
393 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
394 break;
395 default:
396 // Invert CC for unordered comparisons
397 ShouldInvertCC = true;
398 switch (CCCode) {
399 case ISD::SETULT:
400 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
401 (VT == MVT::f64) ? RTLIB::OGE_F64 :
402 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
403 break;
404 case ISD::SETULE:
405 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
406 (VT == MVT::f64) ? RTLIB::OGT_F64 :
407 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
408 break;
409 case ISD::SETUGT:
410 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
411 (VT == MVT::f64) ? RTLIB::OLE_F64 :
412 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
413 break;
414 case ISD::SETUGE:
415 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
416 (VT == MVT::f64) ? RTLIB::OLT_F64 :
417 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
418 break;
419 default: llvm_unreachable("Do not know how to soften this setcc!");
420 }
421 }
422
423 // Use the target specific return value for comparison lib calls.
425 SDValue Ops[2] = {NewLHS, NewRHS};
427 EVT OpsVT[2] = { OldLHS.getValueType(),
428 OldRHS.getValueType() };
429 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
430 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
431 NewLHS = Call.first;
432 NewRHS = DAG.getConstant(0, dl, RetVT);
433
434 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
435 if (LC1Impl == RTLIB::Unsupported) {
437 "no libcall available to soften floating-point compare");
438 }
439
440 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
441 if (ShouldInvertCC) {
442 assert(RetVT.isInteger());
443 CCCode = getSetCCInverse(CCCode, RetVT);
444 }
445
446 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
447 // Update Chain.
448 Chain = Call.second;
449 } else {
450 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
451 if (LC2Impl == RTLIB::Unsupported) {
453 "no libcall available to soften floating-point compare");
454 }
455
456 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
457 "unordered call should be simple boolean");
458
459 EVT SetCCVT =
460 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
462 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
463 DAG.getValueType(MVT::i1));
464 }
465
466 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
467 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
468 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
469 if (ShouldInvertCC)
470 CCCode = getSetCCInverse(CCCode, RetVT);
471 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
472 if (Chain)
473 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
474 Call2.second);
475 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
476 Tmp.getValueType(), Tmp, NewLHS);
477 NewRHS = SDValue();
478 }
479}
480
481/// Return the entry encoding for a jump table in the current function. The
482/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
484 // In non-pic modes, just use the address of a block.
485 if (!isPositionIndependent())
487
488 // Otherwise, use a label difference.
490}
491
493 SelectionDAG &DAG) const {
494 return Table;
495}
496
497/// This returns the relocation base for the given PIC jumptable, the same as
498/// getPICJumpTableRelocBase, but as an MCExpr.
499const MCExpr *
501 unsigned JTI,MCContext &Ctx) const{
502 // The normal PIC reloc base is the label at the start of the jump table.
503 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
504}
505
507 SDValue Addr, int JTI,
508 SelectionDAG &DAG) const {
509 SDValue Chain = Value;
510 // Jump table debug info is only needed if CodeView is enabled.
512 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
513 }
514 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
515}
516
517bool
519 const TargetMachine &TM = getTargetMachine();
520 const GlobalValue *GV = GA->getGlobal();
521
522 // If the address is not even local to this DSO we will have to load it from
523 // a got and then add the offset.
524 if (!TM.shouldAssumeDSOLocal(GV))
525 return false;
526
527 // If the code is position independent we will have to add a base register.
528 if (isPositionIndependent())
529 return false;
530
531 // Otherwise we can do it.
532 return true;
533}
534
535//===----------------------------------------------------------------------===//
536// Optimization Methods
537//===----------------------------------------------------------------------===//
538
539/// If the specified instruction has a constant integer operand and there are
540/// bits set in that constant that are not demanded, then clear those bits and
541/// return true.
543 const APInt &DemandedBits,
544 const APInt &DemandedElts,
545 TargetLoweringOpt &TLO) const {
546 SDLoc DL(Op);
547 unsigned Opcode = Op.getOpcode();
548
549 // Early-out if we've ended up calling an undemanded node, leave this to
550 // constant folding.
551 if (DemandedBits.isZero() || DemandedElts.isZero())
552 return false;
553
554 // Do target-specific constant optimization.
555 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
556 return TLO.New.getNode();
557
558 // FIXME: ISD::SELECT, ISD::SELECT_CC
559 switch (Opcode) {
560 default:
561 break;
562 case ISD::XOR:
563 case ISD::AND:
564 case ISD::OR: {
565 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
566 if (!Op1C || Op1C->isOpaque())
567 return false;
568
569 // If this is a 'not' op, don't touch it because that's a canonical form.
570 const APInt &C = Op1C->getAPIntValue();
571 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
572 return false;
573
574 if (!C.isSubsetOf(DemandedBits)) {
575 EVT VT = Op.getValueType();
576 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
577 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
578 Op->getFlags());
579 return TLO.CombineTo(Op, NewOp);
580 }
581
582 break;
583 }
584 }
585
586 return false;
587}
588
590 const APInt &DemandedBits,
591 TargetLoweringOpt &TLO) const {
592 EVT VT = Op.getValueType();
593 APInt DemandedElts = VT.isVector()
595 : APInt(1, 1);
596 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
597}
598
599/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
600/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
601/// but it could be generalized for targets with other types of implicit
602/// widening casts.
604 const APInt &DemandedBits,
605 TargetLoweringOpt &TLO) const {
606 assert(Op.getNumOperands() == 2 &&
607 "ShrinkDemandedOp only supports binary operators!");
608 assert(Op.getNode()->getNumValues() == 1 &&
609 "ShrinkDemandedOp only supports nodes with one result!");
610
611 EVT VT = Op.getValueType();
612 SelectionDAG &DAG = TLO.DAG;
613 SDLoc dl(Op);
614
615 // Early return, as this function cannot handle vector types.
616 if (VT.isVector())
617 return false;
618
619 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
620 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
621 "ShrinkDemandedOp only supports operands that have the same size!");
622
623 // Don't do this if the node has another user, which may require the
624 // full value.
625 if (!Op.getNode()->hasOneUse())
626 return false;
627
628 // Search for the smallest integer type with free casts to and from
629 // Op's type. For expedience, just check power-of-2 integer types.
630 unsigned DemandedSize = DemandedBits.getActiveBits();
631 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
632 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
633 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
634 if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
635 // We found a type with free casts.
636
637 // If the operation has the 'disjoint' flag, then the
638 // operands on the new node are also disjoint.
639 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
641 SDValue X = DAG.getNode(
642 Op.getOpcode(), dl, SmallVT,
643 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
644 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
645 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
646 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
647 return TLO.CombineTo(Op, Z);
648 }
649 }
650 return false;
651}
652
654 DAGCombinerInfo &DCI) const {
655 SelectionDAG &DAG = DCI.DAG;
656 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
657 !DCI.isBeforeLegalizeOps());
658 KnownBits Known;
659
660 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
661 if (Simplified) {
662 DCI.AddToWorklist(Op.getNode());
664 }
665 return Simplified;
666}
667
669 const APInt &DemandedElts,
670 DAGCombinerInfo &DCI) const {
671 SelectionDAG &DAG = DCI.DAG;
672 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
673 !DCI.isBeforeLegalizeOps());
674 KnownBits Known;
675
676 bool Simplified =
677 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
678 if (Simplified) {
679 DCI.AddToWorklist(Op.getNode());
681 }
682 return Simplified;
683}
684
686 KnownBits &Known,
688 unsigned Depth,
689 bool AssumeSingleUse) const {
690 EVT VT = Op.getValueType();
691
692 // Since the number of lanes in a scalable vector is unknown at compile time,
693 // we track one bit which is implicitly broadcast to all lanes. This means
694 // that all lanes in a scalable vector are considered demanded.
695 APInt DemandedElts = VT.isFixedLengthVector()
697 : APInt(1, 1);
698 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
699 AssumeSingleUse);
700}
701
702// TODO: Under what circumstances can we create nodes? Constant folding?
704 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
705 SelectionDAG &DAG, unsigned Depth) const {
706 EVT VT = Op.getValueType();
707
708 // Limit search depth.
710 return SDValue();
711
712 // Ignore UNDEFs.
713 if (Op.isUndef())
714 return SDValue();
715
716 // Not demanding any bits/elts from Op.
717 if (DemandedBits == 0 || DemandedElts == 0)
718 return DAG.getUNDEF(VT);
719
720 bool IsLE = DAG.getDataLayout().isLittleEndian();
721 unsigned NumElts = DemandedElts.getBitWidth();
722 unsigned BitWidth = DemandedBits.getBitWidth();
723 KnownBits LHSKnown, RHSKnown;
724 switch (Op.getOpcode()) {
725 case ISD::BITCAST: {
726 if (VT.isScalableVector())
727 return SDValue();
728
729 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
730 EVT SrcVT = Src.getValueType();
731 EVT DstVT = Op.getValueType();
732 if (SrcVT == DstVT)
733 return Src;
734
735 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
736 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
737 if (NumSrcEltBits == NumDstEltBits)
738 if (SDValue V = SimplifyMultipleUseDemandedBits(
739 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
740 return DAG.getBitcast(DstVT, V);
741
742 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
743 unsigned Scale = NumDstEltBits / NumSrcEltBits;
744 unsigned NumSrcElts = SrcVT.getVectorNumElements();
745 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
746 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
747 for (unsigned i = 0; i != Scale; ++i) {
748 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
749 unsigned BitOffset = EltOffset * NumSrcEltBits;
750 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
751 if (!Sub.isZero()) {
752 DemandedSrcBits |= Sub;
753 for (unsigned j = 0; j != NumElts; ++j)
754 if (DemandedElts[j])
755 DemandedSrcElts.setBit((j * Scale) + i);
756 }
757 }
758
759 if (SDValue V = SimplifyMultipleUseDemandedBits(
760 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
761 return DAG.getBitcast(DstVT, V);
762 }
763
764 // TODO - bigendian once we have test coverage.
765 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
766 unsigned Scale = NumSrcEltBits / NumDstEltBits;
767 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
768 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
769 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
770 for (unsigned i = 0; i != NumElts; ++i)
771 if (DemandedElts[i]) {
772 unsigned Offset = (i % Scale) * NumDstEltBits;
773 DemandedSrcBits.insertBits(DemandedBits, Offset);
774 DemandedSrcElts.setBit(i / Scale);
775 }
776
777 if (SDValue V = SimplifyMultipleUseDemandedBits(
778 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
779 return DAG.getBitcast(DstVT, V);
780 }
781
782 break;
783 }
784 case ISD::AND: {
785 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
786 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
787
788 // If all of the demanded bits are known 1 on one side, return the other.
789 // These bits cannot contribute to the result of the 'and' in this
790 // context.
791 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
792 return Op.getOperand(0);
793 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
794 return Op.getOperand(1);
795 break;
796 }
797 case ISD::OR: {
798 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
799 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
800
801 // If all of the demanded bits are known zero on one side, return the
802 // other. These bits cannot contribute to the result of the 'or' in this
803 // context.
804 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
805 return Op.getOperand(0);
806 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
807 return Op.getOperand(1);
808 break;
809 }
810 case ISD::XOR: {
811 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
812 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
813
814 // If all of the demanded bits are known zero on one side, return the
815 // other.
816 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
817 return Op.getOperand(0);
818 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
819 return Op.getOperand(1);
820 break;
821 }
822 case ISD::ADD: {
823 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
824 if (RHSKnown.isZero())
825 return Op.getOperand(0);
826
827 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
828 if (LHSKnown.isZero())
829 return Op.getOperand(1);
830 break;
831 }
832 case ISD::SHL: {
833 // If we are only demanding sign bits then we can use the shift source
834 // directly.
835 if (std::optional<uint64_t> MaxSA =
836 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
837 SDValue Op0 = Op.getOperand(0);
838 unsigned ShAmt = *MaxSA;
839 unsigned NumSignBits =
840 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
841 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
842 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
843 return Op0;
844 }
845 break;
846 }
847 case ISD::SRL: {
848 // If we are only demanding sign bits then we can use the shift source
849 // directly.
850 if (std::optional<uint64_t> MaxSA =
851 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
852 SDValue Op0 = Op.getOperand(0);
853 unsigned ShAmt = *MaxSA;
854 // Must already be signbits in DemandedBits bounds, and can't demand any
855 // shifted in zeroes.
856 if (DemandedBits.countl_zero() >= ShAmt) {
857 unsigned NumSignBits =
858 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
859 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
860 return Op0;
861 }
862 }
863 break;
864 }
865 case ISD::SETCC: {
866 SDValue Op0 = Op.getOperand(0);
867 SDValue Op1 = Op.getOperand(1);
868 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
869 // If (1) we only need the sign-bit, (2) the setcc operands are the same
870 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
871 // -1, we may be able to bypass the setcc.
872 if (DemandedBits.isSignMask() &&
876 // If we're testing X < 0, then this compare isn't needed - just use X!
877 // FIXME: We're limiting to integer types here, but this should also work
878 // if we don't care about FP signed-zero. The use of SETLT with FP means
879 // that we don't care about NaNs.
880 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
882 return Op0;
883 }
884 break;
885 }
887 // If none of the extended bits are demanded, eliminate the sextinreg.
888 SDValue Op0 = Op.getOperand(0);
889 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
890 unsigned ExBits = ExVT.getScalarSizeInBits();
891 if (DemandedBits.getActiveBits() <= ExBits &&
893 return Op0;
894 // If the input is already sign extended, just drop the extension.
895 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
896 if (NumSignBits >= (BitWidth - ExBits + 1))
897 return Op0;
898 break;
899 }
903 if (VT.isScalableVector())
904 return SDValue();
905
906 // If we only want the lowest element and none of extended bits, then we can
907 // return the bitcasted source vector.
908 SDValue Src = Op.getOperand(0);
909 EVT SrcVT = Src.getValueType();
910 EVT DstVT = Op.getValueType();
911 if (IsLE && DemandedElts == 1 &&
912 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
913 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
914 return DAG.getBitcast(DstVT, Src);
915 }
916 break;
917 }
919 if (VT.isScalableVector())
920 return SDValue();
921
922 // If we don't demand the inserted element, return the base vector.
923 SDValue Vec = Op.getOperand(0);
924 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
925 EVT VecVT = Vec.getValueType();
926 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
927 !DemandedElts[CIdx->getZExtValue()])
928 return Vec;
929 break;
930 }
932 if (VT.isScalableVector())
933 return SDValue();
934
935 SDValue Vec = Op.getOperand(0);
936 SDValue Sub = Op.getOperand(1);
937 uint64_t Idx = Op.getConstantOperandVal(2);
938 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
939 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
940 // If we don't demand the inserted subvector, return the base vector.
941 if (DemandedSubElts == 0)
942 return Vec;
943 break;
944 }
945 case ISD::VECTOR_SHUFFLE: {
947 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
948
949 // If all the demanded elts are from one operand and are inline,
950 // then we can use the operand directly.
951 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
952 for (unsigned i = 0; i != NumElts; ++i) {
953 int M = ShuffleMask[i];
954 if (M < 0 || !DemandedElts[i])
955 continue;
956 AllUndef = false;
957 IdentityLHS &= (M == (int)i);
958 IdentityRHS &= ((M - NumElts) == i);
959 }
960
961 if (AllUndef)
962 return DAG.getUNDEF(Op.getValueType());
963 if (IdentityLHS)
964 return Op.getOperand(0);
965 if (IdentityRHS)
966 return Op.getOperand(1);
967 break;
968 }
969 default:
970 // TODO: Probably okay to remove after audit; here to reduce change size
971 // in initial enablement patch for scalable vectors
972 if (VT.isScalableVector())
973 return SDValue();
974
975 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
976 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
977 Op, DemandedBits, DemandedElts, DAG, Depth))
978 return V;
979 break;
980 }
981 return SDValue();
982}
983
986 unsigned Depth) const {
987 EVT VT = Op.getValueType();
988 // Since the number of lanes in a scalable vector is unknown at compile time,
989 // we track one bit which is implicitly broadcast to all lanes. This means
990 // that all lanes in a scalable vector are considered demanded.
991 APInt DemandedElts = VT.isFixedLengthVector()
993 : APInt(1, 1);
994 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
995 Depth);
996}
997
999 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1000 unsigned Depth) const {
1001 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1002 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1003 Depth);
1004}
1005
1006// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1007// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1010 const TargetLowering &TLI,
1011 const APInt &DemandedBits,
1012 const APInt &DemandedElts, unsigned Depth) {
1013 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1014 "SRL or SRA node is required here!");
1015 // Is the right shift using an immediate value of 1?
1016 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1017 if (!N1C || !N1C->isOne())
1018 return SDValue();
1019
1020 // We are looking for an avgfloor
1021 // add(ext, ext)
1022 // or one of these as a avgceil
1023 // add(add(ext, ext), 1)
1024 // add(add(ext, 1), ext)
1025 // add(ext, add(ext, 1))
1026 SDValue Add = Op.getOperand(0);
1027 if (Add.getOpcode() != ISD::ADD)
1028 return SDValue();
1029
1030 SDValue ExtOpA = Add.getOperand(0);
1031 SDValue ExtOpB = Add.getOperand(1);
1032 SDValue Add2;
1033 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1034 ConstantSDNode *ConstOp;
1035 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1036 ConstOp->isOne()) {
1037 ExtOpA = Op1;
1038 ExtOpB = Op3;
1039 Add2 = A;
1040 return true;
1041 }
1042 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1043 ConstOp->isOne()) {
1044 ExtOpA = Op1;
1045 ExtOpB = Op2;
1046 Add2 = A;
1047 return true;
1048 }
1049 return false;
1050 };
1051 bool IsCeil =
1052 (ExtOpA.getOpcode() == ISD::ADD &&
1053 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1054 (ExtOpB.getOpcode() == ISD::ADD &&
1055 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1056
1057 // If the shift is signed (sra):
1058 // - Needs >= 2 sign bit for both operands.
1059 // - Needs >= 2 zero bits.
1060 // If the shift is unsigned (srl):
1061 // - Needs >= 1 zero bit for both operands.
1062 // - Needs 1 demanded bit zero and >= 2 sign bits.
1063 SelectionDAG &DAG = TLO.DAG;
1064 unsigned ShiftOpc = Op.getOpcode();
1065 bool IsSigned = false;
1066 unsigned KnownBits;
1067 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1068 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1069 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1070 unsigned NumZeroA =
1071 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1072 unsigned NumZeroB =
1073 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1074 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1075
1076 switch (ShiftOpc) {
1077 default:
1078 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1079 case ISD::SRA: {
1080 if (NumZero >= 2 && NumSigned < NumZero) {
1081 IsSigned = false;
1082 KnownBits = NumZero;
1083 break;
1084 }
1085 if (NumSigned >= 1) {
1086 IsSigned = true;
1087 KnownBits = NumSigned;
1088 break;
1089 }
1090 return SDValue();
1091 }
1092 case ISD::SRL: {
1093 if (NumZero >= 1 && NumSigned < NumZero) {
1094 IsSigned = false;
1095 KnownBits = NumZero;
1096 break;
1097 }
1098 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1099 IsSigned = true;
1100 KnownBits = NumSigned;
1101 break;
1102 }
1103 return SDValue();
1104 }
1105 }
1106
1107 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1108 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1109
1110 // Find the smallest power-2 type that is legal for this vector size and
1111 // operation, given the original type size and the number of known sign/zero
1112 // bits.
1113 EVT VT = Op.getValueType();
1114 unsigned MinWidth =
1115 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1116 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1118 return SDValue();
1119 if (VT.isVector())
1120 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1121 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1122 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1123 // larger type size to do the transform.
1124 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1125 return SDValue();
1126 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1127 Add.getOperand(1)) &&
1128 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1129 Add2.getOperand(1))))
1130 NVT = VT;
1131 else
1132 return SDValue();
1133 }
1134
1135 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1136 // this is likely to stop other folds (reassociation, value tracking etc.)
1137 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1138 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1139 return SDValue();
1140
1141 SDLoc DL(Op);
1142 SDValue ResultAVG =
1143 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1144 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1145 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1146}
1147
1148/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1149/// result of Op are ever used downstream. If we can use this information to
1150/// simplify Op, create a new simplified DAG node and return true, returning the
1151/// original and new nodes in Old and New. Otherwise, analyze the expression and
1152/// return a mask of Known bits for the expression (used to simplify the
1153/// caller). The Known bits may only be accurate for those bits in the
1154/// OriginalDemandedBits and OriginalDemandedElts.
1156 SDValue Op, const APInt &OriginalDemandedBits,
1157 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1158 unsigned Depth, bool AssumeSingleUse) const {
1159 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1160 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1161 "Mask size mismatches value type size!");
1162
1163 // Don't know anything.
1164 Known = KnownBits(BitWidth);
1165
1166 EVT VT = Op.getValueType();
1167 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1168 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1169 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1170 "Unexpected vector size");
1171
1172 APInt DemandedBits = OriginalDemandedBits;
1173 APInt DemandedElts = OriginalDemandedElts;
1174 SDLoc dl(Op);
1175
1176 // Undef operand.
1177 if (Op.isUndef())
1178 return false;
1179
1180 // We can't simplify target constants.
1181 if (Op.getOpcode() == ISD::TargetConstant)
1182 return false;
1183
1184 if (Op.getOpcode() == ISD::Constant) {
1185 // We know all of the bits for a constant!
1186 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1187 return false;
1188 }
1189
1190 if (Op.getOpcode() == ISD::ConstantFP) {
1191 // We know all of the bits for a floating point constant!
1193 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1194 return false;
1195 }
1196
1197 // Other users may use these bits.
1198 bool HasMultiUse = false;
1199 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1201 // Limit search depth.
1202 return false;
1203 }
1204 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1206 DemandedElts = APInt::getAllOnes(NumElts);
1207 HasMultiUse = true;
1208 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1209 // Not demanding any bits/elts from Op.
1210 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1211 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1212 // Limit search depth.
1213 return false;
1214 }
1215
1216 KnownBits Known2;
1217 switch (Op.getOpcode()) {
1218 case ISD::SCALAR_TO_VECTOR: {
1219 if (VT.isScalableVector())
1220 return false;
1221 if (!DemandedElts[0])
1222 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1223
1224 KnownBits SrcKnown;
1225 SDValue Src = Op.getOperand(0);
1226 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1227 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1228 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1229 return true;
1230
1231 // Upper elements are undef, so only get the knownbits if we just demand
1232 // the bottom element.
1233 if (DemandedElts == 1)
1234 Known = SrcKnown.anyextOrTrunc(BitWidth);
1235 break;
1236 }
1237 case ISD::BUILD_VECTOR:
1238 // Collect the known bits that are shared by every demanded element.
1239 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1240 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1241 return false; // Don't fall through, will infinitely loop.
1242 case ISD::SPLAT_VECTOR: {
1243 SDValue Scl = Op.getOperand(0);
1244 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1245 KnownBits KnownScl;
1246 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1247 return true;
1248
1249 // Implicitly truncate the bits to match the official semantics of
1250 // SPLAT_VECTOR.
1251 Known = KnownScl.trunc(BitWidth);
1252 break;
1253 }
1254 case ISD::LOAD: {
1255 auto *LD = cast<LoadSDNode>(Op);
1256 if (getTargetConstantFromLoad(LD)) {
1257 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1258 return false; // Don't fall through, will infinitely loop.
1259 }
1260 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1261 // If this is a ZEXTLoad and we are looking at the loaded value.
1262 EVT MemVT = LD->getMemoryVT();
1263 unsigned MemBits = MemVT.getScalarSizeInBits();
1264 Known.Zero.setBitsFrom(MemBits);
1265 return false; // Don't fall through, will infinitely loop.
1266 }
1267 break;
1268 }
1270 if (VT.isScalableVector())
1271 return false;
1272 SDValue Vec = Op.getOperand(0);
1273 SDValue Scl = Op.getOperand(1);
1274 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1275 EVT VecVT = Vec.getValueType();
1276
1277 // If index isn't constant, assume we need all vector elements AND the
1278 // inserted element.
1279 APInt DemandedVecElts(DemandedElts);
1280 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1281 unsigned Idx = CIdx->getZExtValue();
1282 DemandedVecElts.clearBit(Idx);
1283
1284 // Inserted element is not required.
1285 if (!DemandedElts[Idx])
1286 return TLO.CombineTo(Op, Vec);
1287 }
1288
1289 KnownBits KnownScl;
1290 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1291 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1292 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1293 return true;
1294
1295 Known = KnownScl.anyextOrTrunc(BitWidth);
1296
1297 KnownBits KnownVec;
1298 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1299 Depth + 1))
1300 return true;
1301
1302 if (!!DemandedVecElts)
1303 Known = Known.intersectWith(KnownVec);
1304
1305 return false;
1306 }
1307 case ISD::INSERT_SUBVECTOR: {
1308 if (VT.isScalableVector())
1309 return false;
1310 // Demand any elements from the subvector and the remainder from the src its
1311 // inserted into.
1312 SDValue Src = Op.getOperand(0);
1313 SDValue Sub = Op.getOperand(1);
1314 uint64_t Idx = Op.getConstantOperandVal(2);
1315 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1316 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1317 APInt DemandedSrcElts = DemandedElts;
1318 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1319
1320 KnownBits KnownSub, KnownSrc;
1321 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1322 Depth + 1))
1323 return true;
1324 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1325 Depth + 1))
1326 return true;
1327
1328 Known.Zero.setAllBits();
1329 Known.One.setAllBits();
1330 if (!!DemandedSubElts)
1331 Known = Known.intersectWith(KnownSub);
1332 if (!!DemandedSrcElts)
1333 Known = Known.intersectWith(KnownSrc);
1334
1335 // Attempt to avoid multi-use src if we don't need anything from it.
1336 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1337 !DemandedSrcElts.isAllOnes()) {
1338 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1339 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1340 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1341 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1342 if (NewSub || NewSrc) {
1343 NewSub = NewSub ? NewSub : Sub;
1344 NewSrc = NewSrc ? NewSrc : Src;
1345 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1346 Op.getOperand(2));
1347 return TLO.CombineTo(Op, NewOp);
1348 }
1349 }
1350 break;
1351 }
1353 if (VT.isScalableVector())
1354 return false;
1355 // Offset the demanded elts by the subvector index.
1356 SDValue Src = Op.getOperand(0);
1357 if (Src.getValueType().isScalableVector())
1358 break;
1359 uint64_t Idx = Op.getConstantOperandVal(1);
1360 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1361 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1362
1363 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1364 Depth + 1))
1365 return true;
1366
1367 // Attempt to avoid multi-use src if we don't need anything from it.
1368 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1369 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1370 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1371 if (DemandedSrc) {
1372 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1373 Op.getOperand(1));
1374 return TLO.CombineTo(Op, NewOp);
1375 }
1376 }
1377 break;
1378 }
1379 case ISD::CONCAT_VECTORS: {
1380 if (VT.isScalableVector())
1381 return false;
1382 Known.Zero.setAllBits();
1383 Known.One.setAllBits();
1384 EVT SubVT = Op.getOperand(0).getValueType();
1385 unsigned NumSubVecs = Op.getNumOperands();
1386 unsigned NumSubElts = SubVT.getVectorNumElements();
1387 for (unsigned i = 0; i != NumSubVecs; ++i) {
1388 APInt DemandedSubElts =
1389 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1390 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1391 Known2, TLO, Depth + 1))
1392 return true;
1393 // Known bits are shared by every demanded subvector element.
1394 if (!!DemandedSubElts)
1395 Known = Known.intersectWith(Known2);
1396 }
1397 break;
1398 }
1399 case ISD::VECTOR_SHUFFLE: {
1400 assert(!VT.isScalableVector());
1401 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1402
1403 // Collect demanded elements from shuffle operands..
1404 APInt DemandedLHS, DemandedRHS;
1405 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1406 DemandedRHS))
1407 break;
1408
1409 if (!!DemandedLHS || !!DemandedRHS) {
1410 SDValue Op0 = Op.getOperand(0);
1411 SDValue Op1 = Op.getOperand(1);
1412
1413 Known.Zero.setAllBits();
1414 Known.One.setAllBits();
1415 if (!!DemandedLHS) {
1416 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1417 Depth + 1))
1418 return true;
1419 Known = Known.intersectWith(Known2);
1420 }
1421 if (!!DemandedRHS) {
1422 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1423 Depth + 1))
1424 return true;
1425 Known = Known.intersectWith(Known2);
1426 }
1427
1428 // Attempt to avoid multi-use ops if we don't need anything from them.
1429 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1430 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1431 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1432 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1433 if (DemandedOp0 || DemandedOp1) {
1434 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1435 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1436 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1437 return TLO.CombineTo(Op, NewOp);
1438 }
1439 }
1440 break;
1441 }
1442 case ISD::AND: {
1443 SDValue Op0 = Op.getOperand(0);
1444 SDValue Op1 = Op.getOperand(1);
1445
1446 // If the RHS is a constant, check to see if the LHS would be zero without
1447 // using the bits from the RHS. Below, we use knowledge about the RHS to
1448 // simplify the LHS, here we're using information from the LHS to simplify
1449 // the RHS.
1450 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1451 // Do not increment Depth here; that can cause an infinite loop.
1452 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1453 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1454 if ((LHSKnown.Zero & DemandedBits) ==
1455 (~RHSC->getAPIntValue() & DemandedBits))
1456 return TLO.CombineTo(Op, Op0);
1457
1458 // If any of the set bits in the RHS are known zero on the LHS, shrink
1459 // the constant.
1460 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1461 DemandedElts, TLO))
1462 return true;
1463
1464 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1465 // constant, but if this 'and' is only clearing bits that were just set by
1466 // the xor, then this 'and' can be eliminated by shrinking the mask of
1467 // the xor. For example, for a 32-bit X:
1468 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1469 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1470 LHSKnown.One == ~RHSC->getAPIntValue()) {
1471 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1472 return TLO.CombineTo(Op, Xor);
1473 }
1474 }
1475
1476 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1477 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1478 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1479 (Op0.getOperand(0).isUndef() ||
1481 Op0->hasOneUse()) {
1482 unsigned NumSubElts =
1484 unsigned SubIdx = Op0.getConstantOperandVal(2);
1485 APInt DemandedSub =
1486 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1487 KnownBits KnownSubMask =
1488 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1489 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1490 SDValue NewAnd =
1491 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1492 SDValue NewInsert =
1493 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1494 Op0.getOperand(1), Op0.getOperand(2));
1495 return TLO.CombineTo(Op, NewInsert);
1496 }
1497 }
1498
1499 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1500 Depth + 1))
1501 return true;
1502 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1503 Known2, TLO, Depth + 1))
1504 return true;
1505
1506 // If all of the demanded bits are known one on one side, return the other.
1507 // These bits cannot contribute to the result of the 'and'.
1508 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1509 return TLO.CombineTo(Op, Op0);
1510 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1511 return TLO.CombineTo(Op, Op1);
1512 // If all of the demanded bits in the inputs are known zeros, return zero.
1513 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1514 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1515 // If the RHS is a constant, see if we can simplify it.
1516 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1517 TLO))
1518 return true;
1519 // If the operation can be done in a smaller type, do so.
1520 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1521 return true;
1522
1523 // Attempt to avoid multi-use ops if we don't need anything from them.
1524 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1525 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1526 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1527 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1528 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1529 if (DemandedOp0 || DemandedOp1) {
1530 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1531 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1532 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1533 return TLO.CombineTo(Op, NewOp);
1534 }
1535 }
1536
1537 Known &= Known2;
1538 break;
1539 }
1540 case ISD::OR: {
1541 SDValue Op0 = Op.getOperand(0);
1542 SDValue Op1 = Op.getOperand(1);
1543 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1544 Depth + 1)) {
1545 Op->dropFlags(SDNodeFlags::Disjoint);
1546 return true;
1547 }
1548
1549 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1550 Known2, TLO, Depth + 1)) {
1551 Op->dropFlags(SDNodeFlags::Disjoint);
1552 return true;
1553 }
1554
1555 // If all of the demanded bits are known zero on one side, return the other.
1556 // These bits cannot contribute to the result of the 'or'.
1557 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1558 return TLO.CombineTo(Op, Op0);
1559 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1560 return TLO.CombineTo(Op, Op1);
1561 // If the RHS is a constant, see if we can simplify it.
1562 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1563 return true;
1564 // If the operation can be done in a smaller type, do so.
1565 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1566 return true;
1567
1568 // Attempt to avoid multi-use ops if we don't need anything from them.
1569 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1570 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1571 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1572 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1573 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1574 if (DemandedOp0 || DemandedOp1) {
1575 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1576 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1577 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1578 return TLO.CombineTo(Op, NewOp);
1579 }
1580 }
1581
1582 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1583 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1584 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1585 Op0->hasOneUse() && Op1->hasOneUse()) {
1586 // Attempt to match all commutations - m_c_Or would've been useful!
1587 for (int I = 0; I != 2; ++I) {
1588 SDValue X = Op.getOperand(I).getOperand(0);
1589 SDValue C1 = Op.getOperand(I).getOperand(1);
1590 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1591 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1592 if (Alt.getOpcode() == ISD::OR) {
1593 for (int J = 0; J != 2; ++J) {
1594 if (X == Alt.getOperand(J)) {
1595 SDValue Y = Alt.getOperand(1 - J);
1596 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1597 {C1, C2})) {
1598 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1599 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1600 return TLO.CombineTo(
1601 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1602 }
1603 }
1604 }
1605 }
1606 }
1607 }
1608
1609 Known |= Known2;
1610 break;
1611 }
1612 case ISD::XOR: {
1613 SDValue Op0 = Op.getOperand(0);
1614 SDValue Op1 = Op.getOperand(1);
1615
1616 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1617 Depth + 1))
1618 return true;
1619 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1620 Depth + 1))
1621 return true;
1622
1623 // If all of the demanded bits are known zero on one side, return the other.
1624 // These bits cannot contribute to the result of the 'xor'.
1625 if (DemandedBits.isSubsetOf(Known.Zero))
1626 return TLO.CombineTo(Op, Op0);
1627 if (DemandedBits.isSubsetOf(Known2.Zero))
1628 return TLO.CombineTo(Op, Op1);
1629 // If the operation can be done in a smaller type, do so.
1630 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1631 return true;
1632
1633 // If all of the unknown bits are known to be zero on one side or the other
1634 // turn this into an *inclusive* or.
1635 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1636 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1637 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1638
1639 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1640 if (C) {
1641 // If one side is a constant, and all of the set bits in the constant are
1642 // also known set on the other side, turn this into an AND, as we know
1643 // the bits will be cleared.
1644 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1645 // NB: it is okay if more bits are known than are requested
1646 if (C->getAPIntValue() == Known2.One) {
1647 SDValue ANDC =
1648 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1649 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1650 }
1651
1652 // If the RHS is a constant, see if we can change it. Don't alter a -1
1653 // constant because that's a 'not' op, and that is better for combining
1654 // and codegen.
1655 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1656 // We're flipping all demanded bits. Flip the undemanded bits too.
1657 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1658 return TLO.CombineTo(Op, New);
1659 }
1660
1661 unsigned Op0Opcode = Op0.getOpcode();
1662 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1663 if (ConstantSDNode *ShiftC =
1664 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1665 // Don't crash on an oversized shift. We can not guarantee that a
1666 // bogus shift has been simplified to undef.
1667 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1668 uint64_t ShiftAmt = ShiftC->getZExtValue();
1670 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1671 : Ones.lshr(ShiftAmt);
1672 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1673 isDesirableToCommuteXorWithShift(Op.getNode())) {
1674 // If the xor constant is a demanded mask, do a 'not' before the
1675 // shift:
1676 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1677 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1678 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1679 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1680 Op0.getOperand(1)));
1681 }
1682 }
1683 }
1684 }
1685 }
1686
1687 // If we can't turn this into a 'not', try to shrink the constant.
1688 if (!C || !C->isAllOnes())
1689 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1690 return true;
1691
1692 // Attempt to avoid multi-use ops if we don't need anything from them.
1693 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1694 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1695 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1696 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1697 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1698 if (DemandedOp0 || DemandedOp1) {
1699 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1700 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1701 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1702 return TLO.CombineTo(Op, NewOp);
1703 }
1704 }
1705
1706 Known ^= Known2;
1707 break;
1708 }
1709 case ISD::SELECT:
1710 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1711 Known, TLO, Depth + 1))
1712 return true;
1713 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1714 Known2, TLO, Depth + 1))
1715 return true;
1716
1717 // If the operands are constants, see if we can simplify them.
1718 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1719 return true;
1720
1721 // Only known if known in both the LHS and RHS.
1722 Known = Known.intersectWith(Known2);
1723 break;
1724 case ISD::VSELECT:
1725 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1726 Known, TLO, Depth + 1))
1727 return true;
1728 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1729 Known2, TLO, Depth + 1))
1730 return true;
1731
1732 // Only known if known in both the LHS and RHS.
1733 Known = Known.intersectWith(Known2);
1734 break;
1735 case ISD::SELECT_CC:
1736 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1737 Known, TLO, Depth + 1))
1738 return true;
1739 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1740 Known2, TLO, Depth + 1))
1741 return true;
1742
1743 // If the operands are constants, see if we can simplify them.
1744 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1745 return true;
1746
1747 // Only known if known in both the LHS and RHS.
1748 Known = Known.intersectWith(Known2);
1749 break;
1750 case ISD::SETCC: {
1751 SDValue Op0 = Op.getOperand(0);
1752 SDValue Op1 = Op.getOperand(1);
1753 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1754 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1755 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1756 // -1, we may be able to bypass the setcc.
1757 if (DemandedBits.isSignMask() &&
1761 // If we're testing X < 0, then this compare isn't needed - just use X!
1762 // FIXME: We're limiting to integer types here, but this should also work
1763 // if we don't care about FP signed-zero. The use of SETLT with FP means
1764 // that we don't care about NaNs.
1765 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1767 return TLO.CombineTo(Op, Op0);
1768
1769 // TODO: Should we check for other forms of sign-bit comparisons?
1770 // Examples: X <= -1, X >= 0
1771 }
1772 if (getBooleanContents(Op0.getValueType()) ==
1774 BitWidth > 1)
1775 Known.Zero.setBitsFrom(1);
1776 break;
1777 }
1778 case ISD::SHL: {
1779 SDValue Op0 = Op.getOperand(0);
1780 SDValue Op1 = Op.getOperand(1);
1781 EVT ShiftVT = Op1.getValueType();
1782
1783 if (std::optional<uint64_t> KnownSA =
1784 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1785 unsigned ShAmt = *KnownSA;
1786 if (ShAmt == 0)
1787 return TLO.CombineTo(Op, Op0);
1788
1789 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1790 // single shift. We can do this if the bottom bits (which are shifted
1791 // out) are never demanded.
1792 // TODO - support non-uniform vector amounts.
1793 if (Op0.getOpcode() == ISD::SRL) {
1794 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1795 if (std::optional<uint64_t> InnerSA =
1796 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1797 unsigned C1 = *InnerSA;
1798 unsigned Opc = ISD::SHL;
1799 int Diff = ShAmt - C1;
1800 if (Diff < 0) {
1801 Diff = -Diff;
1802 Opc = ISD::SRL;
1803 }
1804 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1805 return TLO.CombineTo(
1806 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1807 }
1808 }
1809 }
1810
1811 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1812 // are not demanded. This will likely allow the anyext to be folded away.
1813 // TODO - support non-uniform vector amounts.
1814 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1815 SDValue InnerOp = Op0.getOperand(0);
1816 EVT InnerVT = InnerOp.getValueType();
1817 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1818 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1819 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1820 SDValue NarrowShl = TLO.DAG.getNode(
1821 ISD::SHL, dl, InnerVT, InnerOp,
1822 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1823 return TLO.CombineTo(
1824 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1825 }
1826
1827 // Repeat the SHL optimization above in cases where an extension
1828 // intervenes: (shl (anyext (shr x, c1)), c2) to
1829 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1830 // aren't demanded (as above) and that the shifted upper c1 bits of
1831 // x aren't demanded.
1832 // TODO - support non-uniform vector amounts.
1833 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1834 InnerOp.hasOneUse()) {
1835 if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1836 InnerOp, DemandedElts, Depth + 2)) {
1837 unsigned InnerShAmt = *SA2;
1838 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1839 DemandedBits.getActiveBits() <=
1840 (InnerBits - InnerShAmt + ShAmt) &&
1841 DemandedBits.countr_zero() >= ShAmt) {
1842 SDValue NewSA =
1843 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1844 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1845 InnerOp.getOperand(0));
1846 return TLO.CombineTo(
1847 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1848 }
1849 }
1850 }
1851 }
1852
1853 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1854 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1855 Depth + 1)) {
1856 // Disable the nsw and nuw flags. We can no longer guarantee that we
1857 // won't wrap after simplification.
1858 Op->dropFlags(SDNodeFlags::NoWrap);
1859 return true;
1860 }
1861 Known <<= ShAmt;
1862 // low bits known zero.
1863 Known.Zero.setLowBits(ShAmt);
1864
1865 // Attempt to avoid multi-use ops if we don't need anything from them.
1866 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1867 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1868 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1869 if (DemandedOp0) {
1870 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1871 return TLO.CombineTo(Op, NewOp);
1872 }
1873 }
1874
1875 // TODO: Can we merge this fold with the one below?
1876 // Try shrinking the operation as long as the shift amount will still be
1877 // in range.
1878 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1879 Op.getNode()->hasOneUse()) {
1880 // Search for the smallest integer type with free casts to and from
1881 // Op's type. For expedience, just check power-of-2 integer types.
1882 unsigned DemandedSize = DemandedBits.getActiveBits();
1883 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1884 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1885 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1886 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1887 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1888 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1889 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1890 assert(DemandedSize <= SmallVTBits &&
1891 "Narrowed below demanded bits?");
1892 // We found a type with free casts.
1893 SDValue NarrowShl = TLO.DAG.getNode(
1894 ISD::SHL, dl, SmallVT,
1895 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1896 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1897 return TLO.CombineTo(
1898 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1899 }
1900 }
1901 }
1902
1903 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1904 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1905 // Only do this if we demand the upper half so the knownbits are correct.
1906 unsigned HalfWidth = BitWidth / 2;
1907 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1908 DemandedBits.countLeadingOnes() >= HalfWidth) {
1909 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1910 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1911 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1912 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1913 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1914 // If we're demanding the upper bits at all, we must ensure
1915 // that the upper bits of the shift result are known to be zero,
1916 // which is equivalent to the narrow shift being NUW.
1917 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1918 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1919 SDNodeFlags Flags;
1920 Flags.setNoSignedWrap(IsNSW);
1921 Flags.setNoUnsignedWrap(IsNUW);
1922 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1923 SDValue NewShiftAmt =
1924 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1925 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1926 NewShiftAmt, Flags);
1927 SDValue NewExt =
1928 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1929 return TLO.CombineTo(Op, NewExt);
1930 }
1931 }
1932 }
1933 } else {
1934 // This is a variable shift, so we can't shift the demand mask by a known
1935 // amount. But if we are not demanding high bits, then we are not
1936 // demanding those bits from the pre-shifted operand either.
1937 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1938 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1939 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1940 Depth + 1)) {
1941 // Disable the nsw and nuw flags. We can no longer guarantee that we
1942 // won't wrap after simplification.
1943 Op->dropFlags(SDNodeFlags::NoWrap);
1944 return true;
1945 }
1946 Known.resetAll();
1947 }
1948 }
1949
1950 // If we are only demanding sign bits then we can use the shift source
1951 // directly.
1952 if (std::optional<uint64_t> MaxSA =
1953 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1954 unsigned ShAmt = *MaxSA;
1955 unsigned NumSignBits =
1956 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1957 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1958 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1959 return TLO.CombineTo(Op, Op0);
1960 }
1961 break;
1962 }
1963 case ISD::SRL: {
1964 SDValue Op0 = Op.getOperand(0);
1965 SDValue Op1 = Op.getOperand(1);
1966 EVT ShiftVT = Op1.getValueType();
1967
1968 if (std::optional<uint64_t> KnownSA =
1969 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1970 unsigned ShAmt = *KnownSA;
1971 if (ShAmt == 0)
1972 return TLO.CombineTo(Op, Op0);
1973
1974 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1975 // single shift. We can do this if the top bits (which are shifted out)
1976 // are never demanded.
1977 // TODO - support non-uniform vector amounts.
1978 if (Op0.getOpcode() == ISD::SHL) {
1979 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1980 if (std::optional<uint64_t> InnerSA =
1981 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1982 unsigned C1 = *InnerSA;
1983 unsigned Opc = ISD::SRL;
1984 int Diff = ShAmt - C1;
1985 if (Diff < 0) {
1986 Diff = -Diff;
1987 Opc = ISD::SHL;
1988 }
1989 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1990 return TLO.CombineTo(
1991 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1992 }
1993 }
1994 }
1995
1996 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1997 // single sra. We can do this if the top bits are never demanded.
1998 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
1999 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2000 if (std::optional<uint64_t> InnerSA =
2001 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2002 unsigned C1 = *InnerSA;
2003 // Clamp the combined shift amount if it exceeds the bit width.
2004 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2005 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2006 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2007 Op0.getOperand(0), NewSA));
2008 }
2009 }
2010 }
2011
2012 APInt InDemandedMask = (DemandedBits << ShAmt);
2013
2014 // If the shift is exact, then it does demand the low bits (and knows that
2015 // they are zero).
2016 if (Op->getFlags().hasExact())
2017 InDemandedMask.setLowBits(ShAmt);
2018
2019 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2020 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2021 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2023 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2024 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2025 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2026 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2027 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2028 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2029 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2030 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2031 SDValue NewShiftAmt =
2032 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2033 SDValue NewShift =
2034 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2035 return TLO.CombineTo(
2036 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2037 }
2038 }
2039
2040 // Compute the new bits that are at the top now.
2041 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2042 Depth + 1))
2043 return true;
2044 Known >>= ShAmt;
2045 // High bits known zero.
2046 Known.Zero.setHighBits(ShAmt);
2047
2048 // Attempt to avoid multi-use ops if we don't need anything from them.
2049 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2050 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2051 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2052 if (DemandedOp0) {
2053 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2054 return TLO.CombineTo(Op, NewOp);
2055 }
2056 }
2057 } else {
2058 // Use generic knownbits computation as it has support for non-uniform
2059 // shift amounts.
2060 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2061 }
2062
2063 // If we are only demanding sign bits then we can use the shift source
2064 // directly.
2065 if (std::optional<uint64_t> MaxSA =
2066 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2067 unsigned ShAmt = *MaxSA;
2068 // Must already be signbits in DemandedBits bounds, and can't demand any
2069 // shifted in zeroes.
2070 if (DemandedBits.countl_zero() >= ShAmt) {
2071 unsigned NumSignBits =
2072 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2073 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2074 return TLO.CombineTo(Op, Op0);
2075 }
2076 }
2077
2078 // Try to match AVG patterns (after shift simplification).
2079 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2080 DemandedElts, Depth + 1))
2081 return TLO.CombineTo(Op, AVG);
2082
2083 break;
2084 }
2085 case ISD::SRA: {
2086 SDValue Op0 = Op.getOperand(0);
2087 SDValue Op1 = Op.getOperand(1);
2088 EVT ShiftVT = Op1.getValueType();
2089
2090 // If we only want bits that already match the signbit then we don't need
2091 // to shift.
2092 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2093 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2094 NumHiDemandedBits)
2095 return TLO.CombineTo(Op, Op0);
2096
2097 // If this is an arithmetic shift right and only the low-bit is set, we can
2098 // always convert this into a logical shr, even if the shift amount is
2099 // variable. The low bit of the shift cannot be an input sign bit unless
2100 // the shift amount is >= the size of the datatype, which is undefined.
2101 if (DemandedBits.isOne())
2102 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2103
2104 if (std::optional<uint64_t> KnownSA =
2105 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2106 unsigned ShAmt = *KnownSA;
2107 if (ShAmt == 0)
2108 return TLO.CombineTo(Op, Op0);
2109
2110 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2111 // supports sext_inreg.
2112 if (Op0.getOpcode() == ISD::SHL) {
2113 if (std::optional<uint64_t> InnerSA =
2114 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2115 unsigned LowBits = BitWidth - ShAmt;
2116 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2117 if (VT.isVector())
2118 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2120
2121 if (*InnerSA == ShAmt) {
2122 if (!TLO.LegalOperations() ||
2124 return TLO.CombineTo(
2125 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2126 Op0.getOperand(0),
2127 TLO.DAG.getValueType(ExtVT)));
2128
2129 // Even if we can't convert to sext_inreg, we might be able to
2130 // remove this shift pair if the input is already sign extended.
2131 unsigned NumSignBits =
2132 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2133 if (NumSignBits > ShAmt)
2134 return TLO.CombineTo(Op, Op0.getOperand(0));
2135 }
2136 }
2137 }
2138
2139 APInt InDemandedMask = (DemandedBits << ShAmt);
2140
2141 // If the shift is exact, then it does demand the low bits (and knows that
2142 // they are zero).
2143 if (Op->getFlags().hasExact())
2144 InDemandedMask.setLowBits(ShAmt);
2145
2146 // If any of the demanded bits are produced by the sign extension, we also
2147 // demand the input sign bit.
2148 if (DemandedBits.countl_zero() < ShAmt)
2149 InDemandedMask.setSignBit();
2150
2151 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2152 Depth + 1))
2153 return true;
2154 Known >>= ShAmt;
2155
2156 // If the input sign bit is known to be zero, or if none of the top bits
2157 // are demanded, turn this into an unsigned shift right.
2158 if (Known.Zero[BitWidth - ShAmt - 1] ||
2159 DemandedBits.countl_zero() >= ShAmt) {
2160 SDNodeFlags Flags;
2161 Flags.setExact(Op->getFlags().hasExact());
2162 return TLO.CombineTo(
2163 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2164 }
2165
2166 int Log2 = DemandedBits.exactLogBase2();
2167 if (Log2 >= 0) {
2168 // The bit must come from the sign.
2169 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2170 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2171 }
2172
2173 if (Known.One[BitWidth - ShAmt - 1])
2174 // New bits are known one.
2175 Known.One.setHighBits(ShAmt);
2176
2177 // Attempt to avoid multi-use ops if we don't need anything from them.
2178 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2179 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2180 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2181 if (DemandedOp0) {
2182 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2183 return TLO.CombineTo(Op, NewOp);
2184 }
2185 }
2186 }
2187
2188 // Try to match AVG patterns (after shift simplification).
2189 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2190 DemandedElts, Depth + 1))
2191 return TLO.CombineTo(Op, AVG);
2192
2193 break;
2194 }
2195 case ISD::FSHL:
2196 case ISD::FSHR: {
2197 SDValue Op0 = Op.getOperand(0);
2198 SDValue Op1 = Op.getOperand(1);
2199 SDValue Op2 = Op.getOperand(2);
2200 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2201
2202 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2203 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2204
2205 // For fshl, 0-shift returns the 1st arg.
2206 // For fshr, 0-shift returns the 2nd arg.
2207 if (Amt == 0) {
2208 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2209 Known, TLO, Depth + 1))
2210 return true;
2211 break;
2212 }
2213
2214 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2215 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2216 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2217 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2218 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2219 Depth + 1))
2220 return true;
2221 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2222 Depth + 1))
2223 return true;
2224
2225 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2226 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2227 Known = Known.unionWith(Known2);
2228
2229 // Attempt to avoid multi-use ops if we don't need anything from them.
2230 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2231 !DemandedElts.isAllOnes()) {
2232 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2233 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2234 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2235 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2236 if (DemandedOp0 || DemandedOp1) {
2237 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2238 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2239 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2240 DemandedOp1, Op2);
2241 return TLO.CombineTo(Op, NewOp);
2242 }
2243 }
2244 }
2245
2246 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2247 if (isPowerOf2_32(BitWidth)) {
2248 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2249 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2250 Known2, TLO, Depth + 1))
2251 return true;
2252 }
2253 break;
2254 }
2255 case ISD::ROTL:
2256 case ISD::ROTR: {
2257 SDValue Op0 = Op.getOperand(0);
2258 SDValue Op1 = Op.getOperand(1);
2259 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2260
2261 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2262 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2263 return TLO.CombineTo(Op, Op0);
2264
2265 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2266 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2267 unsigned RevAmt = BitWidth - Amt;
2268
2269 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2270 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2271 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2272 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2273 Depth + 1))
2274 return true;
2275
2276 // rot*(x, 0) --> x
2277 if (Amt == 0)
2278 return TLO.CombineTo(Op, Op0);
2279
2280 // See if we don't demand either half of the rotated bits.
2281 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2282 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2283 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2284 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2285 }
2286 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2287 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2288 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2289 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2290 }
2291 }
2292
2293 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2294 if (isPowerOf2_32(BitWidth)) {
2295 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2296 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2297 Depth + 1))
2298 return true;
2299 }
2300 break;
2301 }
2302 case ISD::SMIN:
2303 case ISD::SMAX:
2304 case ISD::UMIN:
2305 case ISD::UMAX: {
2306 unsigned Opc = Op.getOpcode();
2307 SDValue Op0 = Op.getOperand(0);
2308 SDValue Op1 = Op.getOperand(1);
2309
2310 // If we're only demanding signbits, then we can simplify to OR/AND node.
2311 unsigned BitOp =
2312 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2313 unsigned NumSignBits =
2314 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2315 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2316 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2317 if (NumSignBits >= NumDemandedUpperBits)
2318 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2319
2320 // Check if one arg is always less/greater than (or equal) to the other arg.
2321 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2322 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2323 switch (Opc) {
2324 case ISD::SMIN:
2325 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2326 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2327 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2328 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2329 Known = KnownBits::smin(Known0, Known1);
2330 break;
2331 case ISD::SMAX:
2332 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2333 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2334 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2335 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2336 Known = KnownBits::smax(Known0, Known1);
2337 break;
2338 case ISD::UMIN:
2339 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2340 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2341 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2342 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2343 Known = KnownBits::umin(Known0, Known1);
2344 break;
2345 case ISD::UMAX:
2346 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2347 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2348 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2349 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2350 Known = KnownBits::umax(Known0, Known1);
2351 break;
2352 }
2353 break;
2354 }
2355 case ISD::BITREVERSE: {
2356 SDValue Src = Op.getOperand(0);
2357 APInt DemandedSrcBits = DemandedBits.reverseBits();
2358 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2359 Depth + 1))
2360 return true;
2361 Known = Known2.reverseBits();
2362 break;
2363 }
2364 case ISD::BSWAP: {
2365 SDValue Src = Op.getOperand(0);
2366
2367 // If the only bits demanded come from one byte of the bswap result,
2368 // just shift the input byte into position to eliminate the bswap.
2369 unsigned NLZ = DemandedBits.countl_zero();
2370 unsigned NTZ = DemandedBits.countr_zero();
2371
2372 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2373 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2374 // have 14 leading zeros, round to 8.
2375 NLZ = alignDown(NLZ, 8);
2376 NTZ = alignDown(NTZ, 8);
2377 // If we need exactly one byte, we can do this transformation.
2378 if (BitWidth - NLZ - NTZ == 8) {
2379 // Replace this with either a left or right shift to get the byte into
2380 // the right place.
2381 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2382 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2383 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2384 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2385 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2386 return TLO.CombineTo(Op, NewOp);
2387 }
2388 }
2389
2390 APInt DemandedSrcBits = DemandedBits.byteSwap();
2391 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2392 Depth + 1))
2393 return true;
2394 Known = Known2.byteSwap();
2395 break;
2396 }
2397 case ISD::CTPOP: {
2398 // If only 1 bit is demanded, replace with PARITY as long as we're before
2399 // op legalization.
2400 // FIXME: Limit to scalars for now.
2401 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2402 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2403 Op.getOperand(0)));
2404
2405 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2406 break;
2407 }
2409 SDValue Op0 = Op.getOperand(0);
2410 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2411 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2412
2413 // If we only care about the highest bit, don't bother shifting right.
2414 if (DemandedBits.isSignMask()) {
2415 unsigned MinSignedBits =
2416 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2417 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2418 // However if the input is already sign extended we expect the sign
2419 // extension to be dropped altogether later and do not simplify.
2420 if (!AlreadySignExtended) {
2421 // Compute the correct shift amount type, which must be getShiftAmountTy
2422 // for scalar types after legalization.
2423 SDValue ShiftAmt =
2424 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2425 return TLO.CombineTo(Op,
2426 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2427 }
2428 }
2429
2430 // If none of the extended bits are demanded, eliminate the sextinreg.
2431 if (DemandedBits.getActiveBits() <= ExVTBits)
2432 return TLO.CombineTo(Op, Op0);
2433
2434 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2435
2436 // Since the sign extended bits are demanded, we know that the sign
2437 // bit is demanded.
2438 InputDemandedBits.setBit(ExVTBits - 1);
2439
2440 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2441 Depth + 1))
2442 return true;
2443
2444 // If the sign bit of the input is known set or clear, then we know the
2445 // top bits of the result.
2446
2447 // If the input sign bit is known zero, convert this into a zero extension.
2448 if (Known.Zero[ExVTBits - 1])
2449 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2450
2451 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2452 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2453 Known.One.setBitsFrom(ExVTBits);
2454 Known.Zero &= Mask;
2455 } else { // Input sign bit unknown
2456 Known.Zero &= Mask;
2457 Known.One &= Mask;
2458 }
2459 break;
2460 }
2461 case ISD::BUILD_PAIR: {
2462 EVT HalfVT = Op.getOperand(0).getValueType();
2463 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2464
2465 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2466 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2467
2468 KnownBits KnownLo, KnownHi;
2469
2470 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2471 return true;
2472
2473 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2474 return true;
2475
2476 Known = KnownHi.concat(KnownLo);
2477 break;
2478 }
2480 if (VT.isScalableVector())
2481 return false;
2482 [[fallthrough]];
2483 case ISD::ZERO_EXTEND: {
2484 SDValue Src = Op.getOperand(0);
2485 EVT SrcVT = Src.getValueType();
2486 unsigned InBits = SrcVT.getScalarSizeInBits();
2487 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2488 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2489
2490 // If none of the top bits are demanded, convert this into an any_extend.
2491 if (DemandedBits.getActiveBits() <= InBits) {
2492 // If we only need the non-extended bits of the bottom element
2493 // then we can just bitcast to the result.
2494 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2495 VT.getSizeInBits() == SrcVT.getSizeInBits())
2496 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2497
2498 unsigned Opc =
2500 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2501 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2502 }
2503
2504 APInt InDemandedBits = DemandedBits.trunc(InBits);
2505 APInt InDemandedElts = DemandedElts.zext(InElts);
2506 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2507 Depth + 1)) {
2508 Op->dropFlags(SDNodeFlags::NonNeg);
2509 return true;
2510 }
2511 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2512 Known = Known.zext(BitWidth);
2513
2514 // Attempt to avoid multi-use ops if we don't need anything from them.
2515 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2516 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2517 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2518 break;
2519 }
2521 if (VT.isScalableVector())
2522 return false;
2523 [[fallthrough]];
2524 case ISD::SIGN_EXTEND: {
2525 SDValue Src = Op.getOperand(0);
2526 EVT SrcVT = Src.getValueType();
2527 unsigned InBits = SrcVT.getScalarSizeInBits();
2528 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2529 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2530
2531 APInt InDemandedElts = DemandedElts.zext(InElts);
2532 APInt InDemandedBits = DemandedBits.trunc(InBits);
2533
2534 // Since some of the sign extended bits are demanded, we know that the sign
2535 // bit is demanded.
2536 InDemandedBits.setBit(InBits - 1);
2537
2538 // If none of the top bits are demanded, convert this into an any_extend.
2539 if (DemandedBits.getActiveBits() <= InBits) {
2540 // If we only need the non-extended bits of the bottom element
2541 // then we can just bitcast to the result.
2542 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2543 VT.getSizeInBits() == SrcVT.getSizeInBits())
2544 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2545
2546 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2548 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2549 InBits) {
2550 unsigned Opc =
2552 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2553 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2554 }
2555 }
2556
2557 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2558 Depth + 1))
2559 return true;
2560 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2561
2562 // If the sign bit is known one, the top bits match.
2563 Known = Known.sext(BitWidth);
2564
2565 // If the sign bit is known zero, convert this to a zero extend.
2566 if (Known.isNonNegative()) {
2567 unsigned Opc =
2569 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2570 SDNodeFlags Flags;
2571 if (!IsVecInReg)
2572 Flags |= SDNodeFlags::NonNeg;
2573 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2574 }
2575 }
2576
2577 // Attempt to avoid multi-use ops if we don't need anything from them.
2578 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2579 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2580 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2581 break;
2582 }
2584 if (VT.isScalableVector())
2585 return false;
2586 [[fallthrough]];
2587 case ISD::ANY_EXTEND: {
2588 SDValue Src = Op.getOperand(0);
2589 EVT SrcVT = Src.getValueType();
2590 unsigned InBits = SrcVT.getScalarSizeInBits();
2591 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2592 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2593
2594 // If we only need the bottom element then we can just bitcast.
2595 // TODO: Handle ANY_EXTEND?
2596 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2597 VT.getSizeInBits() == SrcVT.getSizeInBits())
2598 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2599
2600 APInt InDemandedBits = DemandedBits.trunc(InBits);
2601 APInt InDemandedElts = DemandedElts.zext(InElts);
2602 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2603 Depth + 1))
2604 return true;
2605 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2606 Known = Known.anyext(BitWidth);
2607
2608 // Attempt to avoid multi-use ops if we don't need anything from them.
2609 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2610 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2611 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2612 break;
2613 }
2614 case ISD::TRUNCATE: {
2615 SDValue Src = Op.getOperand(0);
2616
2617 // Simplify the input, using demanded bit information, and compute the known
2618 // zero/one bits live out.
2619 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2620 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2621 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2622 Depth + 1)) {
2623 // Disable the nsw and nuw flags. We can no longer guarantee that we
2624 // won't wrap after simplification.
2625 Op->dropFlags(SDNodeFlags::NoWrap);
2626 return true;
2627 }
2628 Known = Known.trunc(BitWidth);
2629
2630 // Attempt to avoid multi-use ops if we don't need anything from them.
2631 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2632 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2633 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2634
2635 // If the input is only used by this truncate, see if we can shrink it based
2636 // on the known demanded bits.
2637 switch (Src.getOpcode()) {
2638 default:
2639 break;
2640 case ISD::SRL:
2641 // Shrink SRL by a constant if none of the high bits shifted in are
2642 // demanded.
2643 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2644 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2645 // undesirable.
2646 break;
2647
2648 if (Src.getNode()->hasOneUse()) {
2649 if (isTruncateFree(Src, VT) &&
2650 !isTruncateFree(Src.getValueType(), VT)) {
2651 // If truncate is only free at trunc(srl), do not turn it into
2652 // srl(trunc). The check is done by first check the truncate is free
2653 // at Src's opcode(srl), then check the truncate is not done by
2654 // referencing sub-register. In test, if both trunc(srl) and
2655 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2656 // trunc(srl)'s trunc is free, trunc(srl) is better.
2657 break;
2658 }
2659
2660 std::optional<uint64_t> ShAmtC =
2661 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2662 if (!ShAmtC || *ShAmtC >= BitWidth)
2663 break;
2664 uint64_t ShVal = *ShAmtC;
2665
2666 APInt HighBits =
2667 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2668 HighBits.lshrInPlace(ShVal);
2669 HighBits = HighBits.trunc(BitWidth);
2670 if (!(HighBits & DemandedBits)) {
2671 // None of the shifted in bits are needed. Add a truncate of the
2672 // shift input, then shift it.
2673 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2674 SDValue NewTrunc =
2675 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2676 return TLO.CombineTo(
2677 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2678 }
2679 }
2680 break;
2681 }
2682
2683 break;
2684 }
2685 case ISD::AssertZext: {
2686 // AssertZext demands all of the high bits, plus any of the low bits
2687 // demanded by its users.
2688 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2690 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2691 TLO, Depth + 1))
2692 return true;
2693
2694 Known.Zero |= ~InMask;
2695 Known.One &= (~Known.Zero);
2696 break;
2697 }
2699 SDValue Src = Op.getOperand(0);
2700 SDValue Idx = Op.getOperand(1);
2701 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2702 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2703
2704 if (SrcEltCnt.isScalable())
2705 return false;
2706
2707 // Demand the bits from every vector element without a constant index.
2708 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2709 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2710 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2711 if (CIdx->getAPIntValue().ult(NumSrcElts))
2712 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2713
2714 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2715 // anything about the extended bits.
2716 APInt DemandedSrcBits = DemandedBits;
2717 if (BitWidth > EltBitWidth)
2718 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2719
2720 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2721 Depth + 1))
2722 return true;
2723
2724 // Attempt to avoid multi-use ops if we don't need anything from them.
2725 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2726 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2727 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2728 SDValue NewOp =
2729 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2730 return TLO.CombineTo(Op, NewOp);
2731 }
2732 }
2733
2734 Known = Known2;
2735 if (BitWidth > EltBitWidth)
2736 Known = Known.anyext(BitWidth);
2737 break;
2738 }
2739 case ISD::BITCAST: {
2740 if (VT.isScalableVector())
2741 return false;
2742 SDValue Src = Op.getOperand(0);
2743 EVT SrcVT = Src.getValueType();
2744 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2745
2746 // If this is an FP->Int bitcast and if the sign bit is the only
2747 // thing demanded, turn this into a FGETSIGN.
2748 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2749 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2750 SrcVT.isFloatingPoint()) {
2751 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2752 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2753 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2754 SrcVT != MVT::f128) {
2755 // Cannot eliminate/lower SHL for f128 yet.
2756 EVT Ty = OpVTLegal ? VT : MVT::i32;
2757 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2758 // place. We expect the SHL to be eliminated by other optimizations.
2759 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2760 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2761 if (!OpVTLegal && OpVTSizeInBits > 32)
2762 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2763 unsigned ShVal = Op.getValueSizeInBits() - 1;
2764 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2765 return TLO.CombineTo(Op,
2766 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2767 }
2768 }
2769
2770 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2771 // Demand the elt/bit if any of the original elts/bits are demanded.
2772 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2773 unsigned Scale = BitWidth / NumSrcEltBits;
2774 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2775 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2776 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2777 for (unsigned i = 0; i != Scale; ++i) {
2778 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2779 unsigned BitOffset = EltOffset * NumSrcEltBits;
2780 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2781 if (!Sub.isZero()) {
2782 DemandedSrcBits |= Sub;
2783 for (unsigned j = 0; j != NumElts; ++j)
2784 if (DemandedElts[j])
2785 DemandedSrcElts.setBit((j * Scale) + i);
2786 }
2787 }
2788
2789 APInt KnownSrcUndef, KnownSrcZero;
2790 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2791 KnownSrcZero, TLO, Depth + 1))
2792 return true;
2793
2794 KnownBits KnownSrcBits;
2795 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2796 KnownSrcBits, TLO, Depth + 1))
2797 return true;
2798 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2799 // TODO - bigendian once we have test coverage.
2800 unsigned Scale = NumSrcEltBits / BitWidth;
2801 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2802 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2803 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2804 for (unsigned i = 0; i != NumElts; ++i)
2805 if (DemandedElts[i]) {
2806 unsigned Offset = (i % Scale) * BitWidth;
2807 DemandedSrcBits.insertBits(DemandedBits, Offset);
2808 DemandedSrcElts.setBit(i / Scale);
2809 }
2810
2811 if (SrcVT.isVector()) {
2812 APInt KnownSrcUndef, KnownSrcZero;
2813 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2814 KnownSrcZero, TLO, Depth + 1))
2815 return true;
2816 }
2817
2818 KnownBits KnownSrcBits;
2819 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2820 KnownSrcBits, TLO, Depth + 1))
2821 return true;
2822
2823 // Attempt to avoid multi-use ops if we don't need anything from them.
2824 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2825 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2826 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2827 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2828 return TLO.CombineTo(Op, NewOp);
2829 }
2830 }
2831 }
2832
2833 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2834 // recursive call where Known may be useful to the caller.
2835 if (Depth > 0) {
2836 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2837 return false;
2838 }
2839 break;
2840 }
2841 case ISD::MUL:
2842 if (DemandedBits.isPowerOf2()) {
2843 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2844 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2845 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2846 unsigned CTZ = DemandedBits.countr_zero();
2847 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2848 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2849 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2850 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2851 return TLO.CombineTo(Op, Shl);
2852 }
2853 }
2854 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2855 // X * X is odd iff X is odd.
2856 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2857 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2858 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2859 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2860 return TLO.CombineTo(Op, And1);
2861 }
2862 [[fallthrough]];
2863 case ISD::ADD:
2864 case ISD::SUB: {
2865 // Add, Sub, and Mul don't demand any bits in positions beyond that
2866 // of the highest bit demanded of them.
2867 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2868 SDNodeFlags Flags = Op.getNode()->getFlags();
2869 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2870 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2871 KnownBits KnownOp0, KnownOp1;
2872 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2873 const KnownBits &KnownRHS) {
2874 if (Op.getOpcode() == ISD::MUL)
2875 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2876 return Demanded;
2877 };
2878 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2879 Depth + 1) ||
2880 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2881 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2882 // See if the operation should be performed at a smaller bit width.
2883 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2884 // Disable the nsw and nuw flags. We can no longer guarantee that we
2885 // won't wrap after simplification.
2886 Op->dropFlags(SDNodeFlags::NoWrap);
2887 return true;
2888 }
2889
2890 // neg x with only low bit demanded is simply x.
2891 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2892 isNullConstant(Op0))
2893 return TLO.CombineTo(Op, Op1);
2894
2895 // Attempt to avoid multi-use ops if we don't need anything from them.
2896 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2897 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2898 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2899 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2900 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2901 if (DemandedOp0 || DemandedOp1) {
2902 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2903 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2904 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2905 Flags & ~SDNodeFlags::NoWrap);
2906 return TLO.CombineTo(Op, NewOp);
2907 }
2908 }
2909
2910 // If we have a constant operand, we may be able to turn it into -1 if we
2911 // do not demand the high bits. This can make the constant smaller to
2912 // encode, allow more general folding, or match specialized instruction
2913 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2914 // is probably not useful (and could be detrimental).
2916 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2917 if (C && !C->isAllOnes() && !C->isOne() &&
2918 (C->getAPIntValue() | HighMask).isAllOnes()) {
2919 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2920 // Disable the nsw and nuw flags. We can no longer guarantee that we
2921 // won't wrap after simplification.
2922 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2923 Flags & ~SDNodeFlags::NoWrap);
2924 return TLO.CombineTo(Op, NewOp);
2925 }
2926
2927 // Match a multiply with a disguised negated-power-of-2 and convert to a
2928 // an equivalent shift-left amount.
2929 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2930 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2931 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2932 return 0;
2933
2934 // Don't touch opaque constants. Also, ignore zero and power-of-2
2935 // multiplies. Those will get folded later.
2936 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2937 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2938 !MulC->getAPIntValue().isPowerOf2()) {
2939 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2940 if (UnmaskedC.isNegatedPowerOf2())
2941 return (-UnmaskedC).logBase2();
2942 }
2943 return 0;
2944 };
2945
2946 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2947 unsigned ShlAmt) {
2948 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2949 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2950 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2951 return TLO.CombineTo(Op, Res);
2952 };
2953
2955 if (Op.getOpcode() == ISD::ADD) {
2956 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2957 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2958 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2959 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2960 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2961 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2962 }
2963 if (Op.getOpcode() == ISD::SUB) {
2964 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2965 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2966 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2967 }
2968 }
2969
2970 if (Op.getOpcode() == ISD::MUL) {
2971 Known = KnownBits::mul(KnownOp0, KnownOp1);
2972 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2974 Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2975 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2976 }
2977 break;
2978 }
2979 case ISD::FABS: {
2980 SDValue Op0 = Op.getOperand(0);
2981 APInt SignMask = APInt::getSignMask(BitWidth);
2982
2983 if (!DemandedBits.intersects(SignMask))
2984 return TLO.CombineTo(Op, Op0);
2985
2986 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
2987 Depth + 1))
2988 return true;
2989
2990 if (Known.isNonNegative())
2991 return TLO.CombineTo(Op, Op0);
2992 if (Known.isNegative())
2993 return TLO.CombineTo(
2994 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
2995
2996 Known.Zero |= SignMask;
2997 Known.One &= ~SignMask;
2998
2999 break;
3000 }
3001 case ISD::FCOPYSIGN: {
3002 SDValue Op0 = Op.getOperand(0);
3003 SDValue Op1 = Op.getOperand(1);
3004
3005 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3006 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3007 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3008 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3009
3010 if (!DemandedBits.intersects(SignMask0))
3011 return TLO.CombineTo(Op, Op0);
3012
3013 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3014 Known, TLO, Depth + 1) ||
3015 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3016 Depth + 1))
3017 return true;
3018
3019 if (Known2.isNonNegative())
3020 return TLO.CombineTo(
3021 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3022
3023 if (Known2.isNegative())
3024 return TLO.CombineTo(
3025 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3026 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3027
3028 Known.Zero &= ~SignMask0;
3029 Known.One &= ~SignMask0;
3030 break;
3031 }
3032 case ISD::FNEG: {
3033 SDValue Op0 = Op.getOperand(0);
3034 APInt SignMask = APInt::getSignMask(BitWidth);
3035
3036 if (!DemandedBits.intersects(SignMask))
3037 return TLO.CombineTo(Op, Op0);
3038
3039 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3040 Depth + 1))
3041 return true;
3042
3043 if (!Known.isSignUnknown()) {
3044 Known.Zero ^= SignMask;
3045 Known.One ^= SignMask;
3046 }
3047
3048 break;
3049 }
3050 default:
3051 // We also ask the target about intrinsics (which could be specific to it).
3052 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3053 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3054 // TODO: Probably okay to remove after audit; here to reduce change size
3055 // in initial enablement patch for scalable vectors
3056 if (Op.getValueType().isScalableVector())
3057 break;
3058 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
3059 Known, TLO, Depth))
3060 return true;
3061 break;
3062 }
3063
3064 // Just use computeKnownBits to compute output bits.
3065 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3066 break;
3067 }
3068
3069 // If we know the value of all of the demanded bits, return this as a
3070 // constant.
3071 if (!isTargetCanonicalConstantNode(Op) &&
3072 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3073 // Avoid folding to a constant if any OpaqueConstant is involved.
3074 if (llvm::any_of(Op->ops(), [](SDValue V) {
3075 auto *C = dyn_cast<ConstantSDNode>(V);
3076 return C && C->isOpaque();
3077 }))
3078 return false;
3079 if (VT.isInteger())
3080 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3081 if (VT.isFloatingPoint())
3082 return TLO.CombineTo(
3083 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3084 dl, VT));
3085 }
3086
3087 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3088 // Try again just for the original demanded elts.
3089 // Ensure we do this AFTER constant folding above.
3090 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3091 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3092
3093 return false;
3094}
3095
3097 const APInt &DemandedElts,
3098 DAGCombinerInfo &DCI) const {
3099 SelectionDAG &DAG = DCI.DAG;
3100 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3101 !DCI.isBeforeLegalizeOps());
3102
3103 APInt KnownUndef, KnownZero;
3104 bool Simplified =
3105 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3106 if (Simplified) {
3107 DCI.AddToWorklist(Op.getNode());
3108 DCI.CommitTargetLoweringOpt(TLO);
3109 }
3110
3111 return Simplified;
3112}
3113
3114/// Given a vector binary operation and known undefined elements for each input
3115/// operand, compute whether each element of the output is undefined.
3117 const APInt &UndefOp0,
3118 const APInt &UndefOp1) {
3119 EVT VT = BO.getValueType();
3121 "Vector binop only");
3122
3123 EVT EltVT = VT.getVectorElementType();
3124 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3125 assert(UndefOp0.getBitWidth() == NumElts &&
3126 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3127
3128 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3129 const APInt &UndefVals) {
3130 if (UndefVals[Index])
3131 return DAG.getUNDEF(EltVT);
3132
3133 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3134 // Try hard to make sure that the getNode() call is not creating temporary
3135 // nodes. Ignore opaque integers because they do not constant fold.
3136 SDValue Elt = BV->getOperand(Index);
3137 auto *C = dyn_cast<ConstantSDNode>(Elt);
3138 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3139 return Elt;
3140 }
3141
3142 return SDValue();
3143 };
3144
3145 APInt KnownUndef = APInt::getZero(NumElts);
3146 for (unsigned i = 0; i != NumElts; ++i) {
3147 // If both inputs for this element are either constant or undef and match
3148 // the element type, compute the constant/undef result for this element of
3149 // the vector.
3150 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3151 // not handle FP constants. The code within getNode() should be refactored
3152 // to avoid the danger of creating a bogus temporary node here.
3153 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3154 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3155 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3156 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3157 KnownUndef.setBit(i);
3158 }
3159 return KnownUndef;
3160}
3161
3163 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3164 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3165 bool AssumeSingleUse) const {
3166 EVT VT = Op.getValueType();
3167 unsigned Opcode = Op.getOpcode();
3168 APInt DemandedElts = OriginalDemandedElts;
3169 unsigned NumElts = DemandedElts.getBitWidth();
3170 assert(VT.isVector() && "Expected vector op");
3171
3172 KnownUndef = KnownZero = APInt::getZero(NumElts);
3173
3174 if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3175 return false;
3176
3177 // TODO: For now we assume we know nothing about scalable vectors.
3178 if (VT.isScalableVector())
3179 return false;
3180
3181 assert(VT.getVectorNumElements() == NumElts &&
3182 "Mask size mismatches value type element count!");
3183
3184 // Undef operand.
3185 if (Op.isUndef()) {
3186 KnownUndef.setAllBits();
3187 return false;
3188 }
3189
3190 // If Op has other users, assume that all elements are needed.
3191 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3192 DemandedElts.setAllBits();
3193
3194 // Not demanding any elements from Op.
3195 if (DemandedElts == 0) {
3196 KnownUndef.setAllBits();
3197 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3198 }
3199
3200 // Limit search depth.
3202 return false;
3203
3204 SDLoc DL(Op);
3205 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3206 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3207
3208 // Helper for demanding the specified elements and all the bits of both binary
3209 // operands.
3210 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3211 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3212 TLO.DAG, Depth + 1);
3213 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3214 TLO.DAG, Depth + 1);
3215 if (NewOp0 || NewOp1) {
3216 SDValue NewOp =
3217 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3218 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3219 return TLO.CombineTo(Op, NewOp);
3220 }
3221 return false;
3222 };
3223
3224 switch (Opcode) {
3225 case ISD::SCALAR_TO_VECTOR: {
3226 if (!DemandedElts[0]) {
3227 KnownUndef.setAllBits();
3228 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3229 }
3230 SDValue ScalarSrc = Op.getOperand(0);
3231 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3232 SDValue Src = ScalarSrc.getOperand(0);
3233 SDValue Idx = ScalarSrc.getOperand(1);
3234 EVT SrcVT = Src.getValueType();
3235
3236 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3237
3238 if (SrcEltCnt.isScalable())
3239 return false;
3240
3241 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3242 if (isNullConstant(Idx)) {
3243 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3244 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3245 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3246 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3247 TLO, Depth + 1))
3248 return true;
3249 }
3250 }
3251 KnownUndef.setHighBits(NumElts - 1);
3252 break;
3253 }
3254 case ISD::BITCAST: {
3255 SDValue Src = Op.getOperand(0);
3256 EVT SrcVT = Src.getValueType();
3257
3258 if (!SrcVT.isVector()) {
3259 // TODO - bigendian once we have test coverage.
3260 if (IsLE) {
3261 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3262 unsigned EltSize = VT.getScalarSizeInBits();
3263 for (unsigned I = 0; I != NumElts; ++I) {
3264 if (DemandedElts[I]) {
3265 unsigned Offset = I * EltSize;
3266 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3267 }
3268 }
3269 KnownBits Known;
3270 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3271 return true;
3272 }
3273 break;
3274 }
3275
3276 // Fast handling of 'identity' bitcasts.
3277 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3278 if (NumSrcElts == NumElts)
3279 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3280 KnownZero, TLO, Depth + 1);
3281
3282 APInt SrcDemandedElts, SrcZero, SrcUndef;
3283
3284 // Bitcast from 'large element' src vector to 'small element' vector, we
3285 // must demand a source element if any DemandedElt maps to it.
3286 if ((NumElts % NumSrcElts) == 0) {
3287 unsigned Scale = NumElts / NumSrcElts;
3288 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3289 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3290 TLO, Depth + 1))
3291 return true;
3292
3293 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3294 // of the large element.
3295 // TODO - bigendian once we have test coverage.
3296 if (IsLE) {
3297 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3298 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3299 for (unsigned i = 0; i != NumElts; ++i)
3300 if (DemandedElts[i]) {
3301 unsigned Ofs = (i % Scale) * EltSizeInBits;
3302 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3303 }
3304
3305 KnownBits Known;
3306 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3307 TLO, Depth + 1))
3308 return true;
3309
3310 // The bitcast has split each wide element into a number of
3311 // narrow subelements. We have just computed the Known bits
3312 // for wide elements. See if element splitting results in
3313 // some subelements being zero. Only for demanded elements!
3314 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3315 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3316 .isAllOnes())
3317 continue;
3318 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3319 unsigned Elt = Scale * SrcElt + SubElt;
3320 if (DemandedElts[Elt])
3321 KnownZero.setBit(Elt);
3322 }
3323 }
3324 }
3325
3326 // If the src element is zero/undef then all the output elements will be -
3327 // only demanded elements are guaranteed to be correct.
3328 for (unsigned i = 0; i != NumSrcElts; ++i) {
3329 if (SrcDemandedElts[i]) {
3330 if (SrcZero[i])
3331 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3332 if (SrcUndef[i])
3333 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3334 }
3335 }
3336 }
3337
3338 // Bitcast from 'small element' src vector to 'large element' vector, we
3339 // demand all smaller source elements covered by the larger demanded element
3340 // of this vector.
3341 if ((NumSrcElts % NumElts) == 0) {
3342 unsigned Scale = NumSrcElts / NumElts;
3343 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3344 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3345 TLO, Depth + 1))
3346 return true;
3347
3348 // If all the src elements covering an output element are zero/undef, then
3349 // the output element will be as well, assuming it was demanded.
3350 for (unsigned i = 0; i != NumElts; ++i) {
3351 if (DemandedElts[i]) {
3352 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3353 KnownZero.setBit(i);
3354 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3355 KnownUndef.setBit(i);
3356 }
3357 }
3358 }
3359 break;
3360 }
3361 case ISD::FREEZE: {
3362 SDValue N0 = Op.getOperand(0);
3363 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3364 /*PoisonOnly=*/false,
3365 Depth + 1))
3366 return TLO.CombineTo(Op, N0);
3367
3368 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3369 // freeze(op(x, ...)) -> op(freeze(x), ...).
3370 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3371 return TLO.CombineTo(
3373 TLO.DAG.getFreeze(N0.getOperand(0))));
3374 break;
3375 }
3376 case ISD::BUILD_VECTOR: {
3377 // Check all elements and simplify any unused elements with UNDEF.
3378 if (!DemandedElts.isAllOnes()) {
3379 // Don't simplify BROADCASTS.
3380 if (llvm::any_of(Op->op_values(),
3381 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3382 SmallVector<SDValue, 32> Ops(Op->ops());
3383 bool Updated = false;
3384 for (unsigned i = 0; i != NumElts; ++i) {
3385 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3386 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3387 KnownUndef.setBit(i);
3388 Updated = true;
3389 }
3390 }
3391 if (Updated)
3392 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3393 }
3394 }
3395 for (unsigned i = 0; i != NumElts; ++i) {
3396 SDValue SrcOp = Op.getOperand(i);
3397 if (SrcOp.isUndef()) {
3398 KnownUndef.setBit(i);
3399 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3401 KnownZero.setBit(i);
3402 }
3403 }
3404 break;
3405 }
3406 case ISD::CONCAT_VECTORS: {
3407 EVT SubVT = Op.getOperand(0).getValueType();
3408 unsigned NumSubVecs = Op.getNumOperands();
3409 unsigned NumSubElts = SubVT.getVectorNumElements();
3410 for (unsigned i = 0; i != NumSubVecs; ++i) {
3411 SDValue SubOp = Op.getOperand(i);
3412 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3413 APInt SubUndef, SubZero;
3414 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3415 Depth + 1))
3416 return true;
3417 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3418 KnownZero.insertBits(SubZero, i * NumSubElts);
3419 }
3420
3421 // Attempt to avoid multi-use ops if we don't need anything from them.
3422 if (!DemandedElts.isAllOnes()) {
3423 bool FoundNewSub = false;
3424 SmallVector<SDValue, 2> DemandedSubOps;
3425 for (unsigned i = 0; i != NumSubVecs; ++i) {
3426 SDValue SubOp = Op.getOperand(i);
3427 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3428 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3429 SubOp, SubElts, TLO.DAG, Depth + 1);
3430 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3431 FoundNewSub = NewSubOp ? true : FoundNewSub;
3432 }
3433 if (FoundNewSub) {
3434 SDValue NewOp =
3435 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3436 return TLO.CombineTo(Op, NewOp);
3437 }
3438 }
3439 break;
3440 }
3441 case ISD::INSERT_SUBVECTOR: {
3442 // Demand any elements from the subvector and the remainder from the src its
3443 // inserted into.
3444 SDValue Src = Op.getOperand(0);
3445 SDValue Sub = Op.getOperand(1);
3446 uint64_t Idx = Op.getConstantOperandVal(2);
3447 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3448 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3449 APInt DemandedSrcElts = DemandedElts;
3450 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3451
3452 APInt SubUndef, SubZero;
3453 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3454 Depth + 1))
3455 return true;
3456
3457 // If none of the src operand elements are demanded, replace it with undef.
3458 if (!DemandedSrcElts && !Src.isUndef())
3459 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3460 TLO.DAG.getUNDEF(VT), Sub,
3461 Op.getOperand(2)));
3462
3463 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3464 TLO, Depth + 1))
3465 return true;
3466 KnownUndef.insertBits(SubUndef, Idx);
3467 KnownZero.insertBits(SubZero, Idx);
3468
3469 // Attempt to avoid multi-use ops if we don't need anything from them.
3470 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3471 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3472 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3473 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3474 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3475 if (NewSrc || NewSub) {
3476 NewSrc = NewSrc ? NewSrc : Src;
3477 NewSub = NewSub ? NewSub : Sub;
3478 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3479 NewSub, Op.getOperand(2));
3480 return TLO.CombineTo(Op, NewOp);
3481 }
3482 }
3483 break;
3484 }
3486 // Offset the demanded elts by the subvector index.
3487 SDValue Src = Op.getOperand(0);
3488 if (Src.getValueType().isScalableVector())
3489 break;
3490 uint64_t Idx = Op.getConstantOperandVal(1);
3491 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3492 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3493
3494 APInt SrcUndef, SrcZero;
3495 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3496 Depth + 1))
3497 return true;
3498 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3499 KnownZero = SrcZero.extractBits(NumElts, Idx);
3500
3501 // Attempt to avoid multi-use ops if we don't need anything from them.
3502 if (!DemandedElts.isAllOnes()) {
3503 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3504 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3505 if (NewSrc) {
3506 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3507 Op.getOperand(1));
3508 return TLO.CombineTo(Op, NewOp);
3509 }
3510 }
3511 break;
3512 }
3514 SDValue Vec = Op.getOperand(0);
3515 SDValue Scl = Op.getOperand(1);
3516 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3517
3518 // For a legal, constant insertion index, if we don't need this insertion
3519 // then strip it, else remove it from the demanded elts.
3520 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3521 unsigned Idx = CIdx->getZExtValue();
3522 if (!DemandedElts[Idx])
3523 return TLO.CombineTo(Op, Vec);
3524
3525 APInt DemandedVecElts(DemandedElts);
3526 DemandedVecElts.clearBit(Idx);
3527 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3528 KnownZero, TLO, Depth + 1))
3529 return true;
3530
3531 KnownUndef.setBitVal(Idx, Scl.isUndef());
3532
3533 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3534 break;
3535 }
3536
3537 APInt VecUndef, VecZero;
3538 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3539 Depth + 1))
3540 return true;
3541 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3542 break;
3543 }
3544 case ISD::VSELECT: {
3545 SDValue Sel = Op.getOperand(0);
3546 SDValue LHS = Op.getOperand(1);
3547 SDValue RHS = Op.getOperand(2);
3548
3549 // Try to transform the select condition based on the current demanded
3550 // elements.
3551 APInt UndefSel, ZeroSel;
3552 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3553 Depth + 1))
3554 return true;
3555
3556 // See if we can simplify either vselect operand.
3557 APInt DemandedLHS(DemandedElts);
3558 APInt DemandedRHS(DemandedElts);
3559 APInt UndefLHS, ZeroLHS;
3560 APInt UndefRHS, ZeroRHS;
3561 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3562 Depth + 1))
3563 return true;
3564 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3565 Depth + 1))
3566 return true;
3567
3568 KnownUndef = UndefLHS & UndefRHS;
3569 KnownZero = ZeroLHS & ZeroRHS;
3570
3571 // If we know that the selected element is always zero, we don't need the
3572 // select value element.
3573 APInt DemandedSel = DemandedElts & ~KnownZero;
3574 if (DemandedSel != DemandedElts)
3575 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3576 Depth + 1))
3577 return true;
3578
3579 break;
3580 }
3581 case ISD::VECTOR_SHUFFLE: {
3582 SDValue LHS = Op.getOperand(0);
3583 SDValue RHS = Op.getOperand(1);
3584 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3585
3586 // Collect demanded elements from shuffle operands..
3587 APInt DemandedLHS(NumElts, 0);
3588 APInt DemandedRHS(NumElts, 0);
3589 for (unsigned i = 0; i != NumElts; ++i) {
3590 int M = ShuffleMask[i];
3591 if (M < 0 || !DemandedElts[i])
3592 continue;
3593 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3594 if (M < (int)NumElts)
3595 DemandedLHS.setBit(M);
3596 else
3597 DemandedRHS.setBit(M - NumElts);
3598 }
3599
3600 // If either side isn't demanded, replace it by UNDEF. We handle this
3601 // explicitly here to also simplify in case of multiple uses (on the
3602 // contrary to the SimplifyDemandedVectorElts calls below).
3603 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3604 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3605 if (FoldLHS || FoldRHS) {
3606 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3607 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3608 SDValue NewOp =
3609 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3610 return TLO.CombineTo(Op, NewOp);
3611 }
3612
3613 // See if we can simplify either shuffle operand.
3614 APInt UndefLHS, ZeroLHS;
3615 APInt UndefRHS, ZeroRHS;
3616 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3617 Depth + 1))
3618 return true;
3619 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3620 Depth + 1))
3621 return true;
3622
3623 // Simplify mask using undef elements from LHS/RHS.
3624 bool Updated = false;
3625 bool IdentityLHS = true, IdentityRHS = true;
3626 SmallVector<int, 32> NewMask(ShuffleMask);
3627 for (unsigned i = 0; i != NumElts; ++i) {
3628 int &M = NewMask[i];
3629 if (M < 0)
3630 continue;
3631 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3632 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3633 Updated = true;
3634 M = -1;
3635 }
3636 IdentityLHS &= (M < 0) || (M == (int)i);
3637 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3638 }
3639
3640 // Update legal shuffle masks based on demanded elements if it won't reduce
3641 // to Identity which can cause premature removal of the shuffle mask.
3642 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3643 SDValue LegalShuffle =
3644 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3645 if (LegalShuffle)
3646 return TLO.CombineTo(Op, LegalShuffle);
3647 }
3648
3649 // Propagate undef/zero elements from LHS/RHS.
3650 for (unsigned i = 0; i != NumElts; ++i) {
3651 int M = ShuffleMask[i];
3652 if (M < 0) {
3653 KnownUndef.setBit(i);
3654 } else if (M < (int)NumElts) {
3655 if (UndefLHS[M])
3656 KnownUndef.setBit(i);
3657 if (ZeroLHS[M])
3658 KnownZero.setBit(i);
3659 } else {
3660 if (UndefRHS[M - NumElts])
3661 KnownUndef.setBit(i);
3662 if (ZeroRHS[M - NumElts])
3663 KnownZero.setBit(i);
3664 }
3665 }
3666 break;
3667 }
3671 APInt SrcUndef, SrcZero;
3672 SDValue Src = Op.getOperand(0);
3673 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3674 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3675 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3676 Depth + 1))
3677 return true;
3678 KnownZero = SrcZero.zextOrTrunc(NumElts);
3679 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3680
3681 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3682 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3683 DemandedSrcElts == 1) {
3684 // aext - if we just need the bottom element then we can bitcast.
3685 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3686 }
3687
3688 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3689 // zext(undef) upper bits are guaranteed to be zero.
3690 if (DemandedElts.isSubsetOf(KnownUndef))
3691 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3692 KnownUndef.clearAllBits();
3693
3694 // zext - if we just need the bottom element then we can mask:
3695 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3696 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3697 Op->isOnlyUserOf(Src.getNode()) &&
3698 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3699 SDLoc DL(Op);
3700 EVT SrcVT = Src.getValueType();
3701 EVT SrcSVT = SrcVT.getScalarType();
3702 SmallVector<SDValue> MaskElts;
3703 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3704 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3705 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3706 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3707 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3708 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3709 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3710 }
3711 }
3712 }
3713 break;
3714 }
3715
3716 // TODO: There are more binop opcodes that could be handled here - MIN,
3717 // MAX, saturated math, etc.
3718 case ISD::ADD: {
3719 SDValue Op0 = Op.getOperand(0);
3720 SDValue Op1 = Op.getOperand(1);
3721 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3722 APInt UndefLHS, ZeroLHS;
3723 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3724 Depth + 1, /*AssumeSingleUse*/ true))
3725 return true;
3726 }
3727 [[fallthrough]];
3728 }
3729 case ISD::AVGCEILS:
3730 case ISD::AVGCEILU:
3731 case ISD::AVGFLOORS:
3732 case ISD::AVGFLOORU:
3733 case ISD::OR:
3734 case ISD::XOR:
3735 case ISD::SUB:
3736 case ISD::FADD:
3737 case ISD::FSUB:
3738 case ISD::FMUL:
3739 case ISD::FDIV:
3740 case ISD::FREM: {
3741 SDValue Op0 = Op.getOperand(0);
3742 SDValue Op1 = Op.getOperand(1);
3743
3744 APInt UndefRHS, ZeroRHS;
3745 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3746 Depth + 1))
3747 return true;
3748 APInt UndefLHS, ZeroLHS;
3749 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3750 Depth + 1))
3751 return true;
3752
3753 KnownZero = ZeroLHS & ZeroRHS;
3754 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3755
3756 // Attempt to avoid multi-use ops if we don't need anything from them.
3757 // TODO - use KnownUndef to relax the demandedelts?
3758 if (!DemandedElts.isAllOnes())
3759 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3760 return true;
3761 break;
3762 }
3763 case ISD::SHL:
3764 case ISD::SRL:
3765 case ISD::SRA:
3766 case ISD::ROTL:
3767 case ISD::ROTR: {
3768 SDValue Op0 = Op.getOperand(0);
3769 SDValue Op1 = Op.getOperand(1);
3770
3771 APInt UndefRHS, ZeroRHS;
3772 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3773 Depth + 1))
3774 return true;
3775 APInt UndefLHS, ZeroLHS;
3776 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3777 Depth + 1))
3778 return true;
3779
3780 KnownZero = ZeroLHS;
3781 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3782
3783 // Attempt to avoid multi-use ops if we don't need anything from them.
3784 // TODO - use KnownUndef to relax the demandedelts?
3785 if (!DemandedElts.isAllOnes())
3786 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3787 return true;
3788 break;
3789 }
3790 case ISD::MUL:
3791 case ISD::MULHU:
3792 case ISD::MULHS:
3793 case ISD::AND: {
3794 SDValue Op0 = Op.getOperand(0);
3795 SDValue Op1 = Op.getOperand(1);
3796
3797 APInt SrcUndef, SrcZero;
3798 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3799 Depth + 1))
3800 return true;
3801 // If we know that a demanded element was zero in Op1 we don't need to
3802 // demand it in Op0 - its guaranteed to be zero.
3803 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3804 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3805 TLO, Depth + 1))
3806 return true;
3807
3808 KnownUndef &= DemandedElts0;
3809 KnownZero &= DemandedElts0;
3810
3811 // If every element pair has a zero/undef then just fold to zero.
3812 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3813 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3814 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3815 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3816
3817 // If either side has a zero element, then the result element is zero, even
3818 // if the other is an UNDEF.
3819 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3820 // and then handle 'and' nodes with the rest of the binop opcodes.
3821 KnownZero |= SrcZero;
3822 KnownUndef &= SrcUndef;
3823 KnownUndef &= ~KnownZero;
3824
3825 // Attempt to avoid multi-use ops if we don't need anything from them.
3826 if (!DemandedElts.isAllOnes())
3827 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3828 return true;
3829 break;
3830 }
3831 case ISD::TRUNCATE:
3832 case ISD::SIGN_EXTEND:
3833 case ISD::ZERO_EXTEND:
3834 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3835 KnownZero, TLO, Depth + 1))
3836 return true;
3837
3838 if (!DemandedElts.isAllOnes())
3839 if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3840 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3841 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3842
3843 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3844 // zext(undef) upper bits are guaranteed to be zero.
3845 if (DemandedElts.isSubsetOf(KnownUndef))
3846 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3847 KnownUndef.clearAllBits();
3848 }
3849 break;
3850 case ISD::SINT_TO_FP:
3851 case ISD::UINT_TO_FP:
3852 case ISD::FP_TO_SINT:
3853 case ISD::FP_TO_UINT:
3854 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3855 KnownZero, TLO, Depth + 1))
3856 return true;
3857 // Don't fall through to generic undef -> undef handling.
3858 return false;
3859 default: {
3860 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3861 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3862 KnownZero, TLO, Depth))
3863 return true;
3864 } else {
3865 KnownBits Known;
3866 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3867 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3868 TLO, Depth, AssumeSingleUse))
3869 return true;
3870 }
3871 break;
3872 }
3873 }
3874 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3875
3876 // Constant fold all undef cases.
3877 // TODO: Handle zero cases as well.
3878 if (DemandedElts.isSubsetOf(KnownUndef))
3879 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3880
3881 return false;
3882}
3883
3884/// Determine which of the bits specified in Mask are known to be either zero or
3885/// one and return them in the Known.
3887 KnownBits &Known,
3888 const APInt &DemandedElts,
3889 const SelectionDAG &DAG,
3890 unsigned Depth) const {
3891 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3892 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3893 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3894 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3895 "Should use MaskedValueIsZero if you don't know whether Op"
3896 " is a target node!");
3897 Known.resetAll();
3898}
3899
3902 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3903 unsigned Depth) const {
3904 Known.resetAll();
3905}
3906
3909 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3910 unsigned Depth) const {
3911 Known.resetAll();
3912}
3913
3915 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3916 // The low bits are known zero if the pointer is aligned.
3917 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3918}
3919
3922 unsigned Depth) const {
3923 return Align(1);
3924}
3925
3926/// This method can be implemented by targets that want to expose additional
3927/// information about sign bits to the DAG Combiner.
3929 const APInt &,
3930 const SelectionDAG &,
3931 unsigned Depth) const {
3932 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3933 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3934 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3935 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3936 "Should use ComputeNumSignBits if you don't know whether Op"
3937 " is a target node!");
3938 return 1;
3939}
3940
3942 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3943 const MachineRegisterInfo &MRI, unsigned Depth) const {
3944 return 1;
3945}
3946
3948 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3949 TargetLoweringOpt &TLO, unsigned Depth) const {
3950 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3951 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3952 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3953 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3954 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3955 " is a target node!");
3956 return false;
3957}
3958
3960 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3961 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3962 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3963 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3964 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3965 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3966 "Should use SimplifyDemandedBits if you don't know whether Op"
3967 " is a target node!");
3968 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3969 return false;
3970}
3971
3973 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3974 SelectionDAG &DAG, unsigned Depth) const {
3975 assert(
3976 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3977 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3978 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3979 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3980 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3981 " is a target node!");
3982 return SDValue();
3983}
3984
3985SDValue
3988 SelectionDAG &DAG) const {
3989 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3990 if (!LegalMask) {
3991 std::swap(N0, N1);
3993 LegalMask = isShuffleMaskLegal(Mask, VT);
3994 }
3995
3996 if (!LegalMask)
3997 return SDValue();
3998
3999 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
4000}
4001
4003 return nullptr;
4004}
4005
4007 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4008 bool PoisonOnly, unsigned Depth) const {
4009 assert(
4010 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4011 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4012 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4013 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4014 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4015 " is a target node!");
4016
4017 // If Op can't create undef/poison and none of its operands are undef/poison
4018 // then Op is never undef/poison.
4019 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4020 /*ConsiderFlags*/ true, Depth) &&
4021 all_of(Op->ops(), [&](SDValue V) {
4022 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
4023 Depth + 1);
4024 });
4025}
4026
4028 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4029 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4030 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4031 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4032 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4033 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4034 "Should use canCreateUndefOrPoison if you don't know whether Op"
4035 " is a target node!");
4036 // Be conservative and return true.
4037 return true;
4038}
4039
4041 const APInt &DemandedElts,
4042 const SelectionDAG &DAG,
4043 bool SNaN,
4044 unsigned Depth) const {
4045 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4046 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4047 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4048 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4049 "Should use isKnownNeverNaN if you don't know whether Op"
4050 " is a target node!");
4051 return false;
4052}
4053
4055 const APInt &DemandedElts,
4056 APInt &UndefElts,
4057 const SelectionDAG &DAG,
4058 unsigned Depth) const {
4059 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4060 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4061 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4062 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4063 "Should use isSplatValue if you don't know whether Op"
4064 " is a target node!");
4065 return false;
4066}
4067
4068// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4069// work with truncating build vectors and vectors with elements of less than
4070// 8 bits.
4072 if (!N)
4073 return false;
4074
4075 unsigned EltWidth;
4076 APInt CVal;
4077 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4078 /*AllowTruncation=*/true)) {
4079 CVal = CN->getAPIntValue();
4080 EltWidth = N.getValueType().getScalarSizeInBits();
4081 } else
4082 return false;
4083
4084 // If this is a truncating splat, truncate the splat value.
4085 // Otherwise, we may fail to match the expected values below.
4086 if (EltWidth < CVal.getBitWidth())
4087 CVal = CVal.trunc(EltWidth);
4088
4089 switch (getBooleanContents(N.getValueType())) {
4091 return CVal[0];
4093 return CVal.isOne();
4095 return CVal.isAllOnes();
4096 }
4097
4098 llvm_unreachable("Invalid boolean contents");
4099}
4100
4102 if (!N)
4103 return false;
4104
4105 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
4106 if (!CN) {
4107 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
4108 if (!BV)
4109 return false;
4110
4111 // Only interested in constant splats, we don't care about undef
4112 // elements in identifying boolean constants and getConstantSplatNode
4113 // returns NULL if all ops are undef;
4114 CN = BV->getConstantSplatNode();
4115 if (!CN)
4116 return false;
4117 }
4118
4119 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4120 return !CN->getAPIntValue()[0];
4121
4122 return CN->isZero();
4123}
4124
4126 bool SExt) const {
4127 if (VT == MVT::i1)
4128 return N->isOne();
4129
4131 switch (Cnt) {
4133 // An extended value of 1 is always true, unless its original type is i1,
4134 // in which case it will be sign extended to -1.
4135 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4138 return N->isAllOnes() && SExt;
4139 }
4140 llvm_unreachable("Unexpected enumeration.");
4141}
4142
4143/// This helper function of SimplifySetCC tries to optimize the comparison when
4144/// either operand of the SetCC node is a bitwise-and instruction.
4145SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4146 ISD::CondCode Cond, const SDLoc &DL,
4147 DAGCombinerInfo &DCI) const {
4148 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4149 std::swap(N0, N1);
4150
4151 SelectionDAG &DAG = DCI.DAG;
4152 EVT OpVT = N0.getValueType();
4153 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4154 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4155 return SDValue();
4156
4157 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4158 // iff everything but LSB is known zero:
4159 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4162 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4163 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4164 if (DAG.MaskedValueIsZero(N0, UpperBits))
4165 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4166 }
4167
4168 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4169 // test in a narrow type that we can truncate to with no cost. Examples:
4170 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4171 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4172 // TODO: This conservatively checks for type legality on the source and
4173 // destination types. That may inhibit optimizations, but it also
4174 // allows setcc->shift transforms that may be more beneficial.
4175 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4176 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4177 isTypeLegal(OpVT) && N0.hasOneUse()) {
4178 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4179 AndC->getAPIntValue().getActiveBits());
4180 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4181 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4182 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4183 return DAG.getSetCC(DL, VT, Trunc, Zero,
4185 }
4186 }
4187
4188 // Match these patterns in any of their permutations:
4189 // (X & Y) == Y
4190 // (X & Y) != Y
4191 SDValue X, Y;
4192 if (N0.getOperand(0) == N1) {
4193 X = N0.getOperand(1);
4194 Y = N0.getOperand(0);
4195 } else if (N0.getOperand(1) == N1) {
4196 X = N0.getOperand(0);
4197 Y = N0.getOperand(1);
4198 } else {
4199 return SDValue();
4200 }
4201
4202 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4203 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4204 // its liable to create and infinite loop.
4205 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4206 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4208 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4209 // Note that where Y is variable and is known to have at most one bit set
4210 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4211 // equivalent when Y == 0.
4212 assert(OpVT.isInteger());
4214 if (DCI.isBeforeLegalizeOps() ||
4216 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4217 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4218 // If the target supports an 'and-not' or 'and-complement' logic operation,
4219 // try to use that to make a comparison operation more efficient.
4220 // But don't do this transform if the mask is a single bit because there are
4221 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4222 // 'rlwinm' on PPC).
4223
4224 // Bail out if the compare operand that we want to turn into a zero is
4225 // already a zero (otherwise, infinite loop).
4226 if (isNullConstant(Y))
4227 return SDValue();
4228
4229 // Transform this into: ~X & Y == 0.
4230 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4231 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4232 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4233 }
4234
4235 return SDValue();
4236}
4237
4238/// This helper function of SimplifySetCC tries to optimize the comparison when
4239/// either operand of the SetCC node is a bitwise-or instruction.
4240/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4241SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4242 ISD::CondCode Cond, const SDLoc &DL,
4243 DAGCombinerInfo &DCI) const {
4244 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4245 std::swap(N0, N1);
4246
4247 SelectionDAG &DAG = DCI.DAG;
4248 EVT OpVT = N0.getValueType();
4249 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4250 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4251 return SDValue();
4252
4253 // (X | Y) == Y
4254 // (X | Y) != Y
4255 SDValue X;
4256 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4257 // If the target supports an 'and-not' or 'and-complement' logic operation,
4258 // try to use that to make a comparison operation more efficient.
4259
4260 // Bail out if the compare operand that we want to turn into a zero is
4261 // already a zero (otherwise, infinite loop).
4262 if (isNullConstant(N1))
4263 return SDValue();
4264
4265 // Transform this into: X & ~Y ==/!= 0.
4266 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4267 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4268 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4269 }
4270
4271 return SDValue();
4272}
4273
4274/// There are multiple IR patterns that could be checking whether certain
4275/// truncation of a signed number would be lossy or not. The pattern which is
4276/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4277/// We are looking for the following pattern: (KeptBits is a constant)
4278/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4279/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4280/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4281/// We will unfold it into the natural trunc+sext pattern:
4282/// ((%x << C) a>> C) dstcond %x
4283/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4284SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4285 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4286 const SDLoc &DL) const {
4287 // We must be comparing with a constant.
4288 ConstantSDNode *C1;
4289 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4290 return SDValue();
4291
4292 // N0 should be: add %x, (1 << (KeptBits-1))
4293 if (N0->getOpcode() != ISD::ADD)
4294 return SDValue();
4295
4296 // And we must be 'add'ing a constant.
4297 ConstantSDNode *C01;
4298 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4299 return SDValue();
4300
4301 SDValue X = N0->getOperand(0);
4302 EVT XVT = X.getValueType();
4303
4304 // Validate constants ...
4305
4306 APInt I1 = C1->getAPIntValue();
4307
4308 ISD::CondCode NewCond;
4309 if (Cond == ISD::CondCode::SETULT) {
4310 NewCond = ISD::CondCode::SETEQ;
4311 } else if (Cond == ISD::CondCode::SETULE) {
4312 NewCond = ISD::CondCode::SETEQ;
4313 // But need to 'canonicalize' the constant.
4314 I1 += 1;
4315 } else if (Cond == ISD::CondCode::SETUGT) {
4316 NewCond = ISD::CondCode::SETNE;
4317 // But need to 'canonicalize' the constant.
4318 I1 += 1;
4319 } else if (Cond == ISD::CondCode::SETUGE) {
4320 NewCond = ISD::CondCode::SETNE;
4321 } else
4322 return SDValue();
4323
4324 APInt I01 = C01->getAPIntValue();
4325
4326 auto checkConstants = [&I1, &I01]() -> bool {
4327 // Both of them must be power-of-two, and the constant from setcc is bigger.
4328 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4329 };
4330
4331 if (checkConstants()) {
4332 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4333 } else {
4334 // What if we invert constants? (and the target predicate)
4335 I1.negate();
4336 I01.negate();
4337 assert(XVT.isInteger());
4338 NewCond = getSetCCInverse(NewCond, XVT);
4339 if (!checkConstants())
4340 return SDValue();
4341 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4342 }
4343
4344 // They are power-of-two, so which bit is set?
4345 const unsigned KeptBits = I1.logBase2();
4346 const unsigned KeptBitsMinusOne = I01.logBase2();
4347
4348 // Magic!
4349 if (KeptBits != (KeptBitsMinusOne + 1))
4350 return SDValue();
4351 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4352
4353 // We don't want to do this in every single case.
4354 SelectionDAG &DAG = DCI.DAG;
4355 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4356 return SDValue();
4357
4358 // Unfold into: sext_inreg(%x) cond %x
4359 // Where 'cond' will be either 'eq' or 'ne'.
4360 SDValue SExtInReg = DAG.getNode(
4362 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4363 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4364}
4365
4366// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4367SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4368 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4369 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4371 "Should be a comparison with 0.");
4372 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4373 "Valid only for [in]equality comparisons.");
4374
4375 unsigned NewShiftOpcode;
4376 SDValue X, C, Y;
4377
4378 SelectionDAG &DAG = DCI.DAG;
4379
4380 // Look for '(C l>>/<< Y)'.
4381 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4382 // The shift should be one-use.
4383 if (!V.hasOneUse())
4384 return false;
4385 unsigned OldShiftOpcode = V.getOpcode();
4386 switch (OldShiftOpcode) {
4387 case ISD::SHL:
4388 NewShiftOpcode = ISD::SRL;
4389 break;
4390 case ISD::SRL:
4391 NewShiftOpcode = ISD::SHL;
4392 break;
4393 default:
4394 return false; // must be a logical shift.
4395 }
4396 // We should be shifting a constant.
4397 // FIXME: best to use isConstantOrConstantVector().
4398 C = V.getOperand(0);
4399 ConstantSDNode *CC =
4400 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4401 if (!CC)
4402 return false;
4403 Y = V.getOperand(1);
4404
4406 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4408 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4409 };
4410
4411 // LHS of comparison should be an one-use 'and'.
4412 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4413 return SDValue();
4414
4415 X = N0.getOperand(0);
4416 SDValue Mask = N0.getOperand(1);
4417
4418 // 'and' is commutative!
4419 if (!Match(Mask)) {
4420 std::swap(X, Mask);
4421 if (!Match(Mask))
4422 return SDValue();
4423 }
4424
4425 EVT VT = X.getValueType();
4426
4427 // Produce:
4428 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4429 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4430 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4431 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4432 return T2;
4433}
4434
4435/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4436/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4437/// handle the commuted versions of these patterns.
4438SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4439 ISD::CondCode Cond, const SDLoc &DL,
4440 DAGCombinerInfo &DCI) const {
4441 unsigned BOpcode = N0.getOpcode();
4442 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4443 "Unexpected binop");
4444 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4445
4446 // (X + Y) == X --> Y == 0
4447 // (X - Y) == X --> Y == 0
4448 // (X ^ Y) == X --> Y == 0
4449 SelectionDAG &DAG = DCI.DAG;
4450 EVT OpVT = N0.getValueType();
4451 SDValue X = N0.getOperand(0);
4452 SDValue Y = N0.getOperand(1);
4453 if (X == N1)
4454 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4455
4456 if (Y != N1)
4457 return SDValue();
4458
4459 // (X + Y) == Y --> X == 0
4460 // (X ^ Y) == Y --> X == 0
4461 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4462 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4463
4464 // The shift would not be valid if the operands are boolean (i1).
4465 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4466 return SDValue();
4467
4468 // (X - Y) == Y --> X == Y << 1
4469 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4470 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4471 if (!DCI.isCalledByLegalizer())
4472 DCI.AddToWorklist(YShl1.getNode());
4473 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4474}
4475
4477 SDValue N0, const APInt &C1,
4478 ISD::CondCode Cond, const SDLoc &dl,
4479 SelectionDAG &DAG) {
4480 // Look through truncs that don't change the value of a ctpop.
4481 // FIXME: Add vector support? Need to be careful with setcc result type below.
4482 SDValue CTPOP = N0;
4483 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4485 CTPOP = N0.getOperand(0);
4486
4487 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4488 return SDValue();
4489
4490 EVT CTVT = CTPOP.getValueType();
4491 SDValue CTOp = CTPOP.getOperand(0);
4492
4493 // Expand a power-of-2-or-zero comparison based on ctpop:
4494 // (ctpop x) u< 2 -> (x & x-1) == 0
4495 // (ctpop x) u> 1 -> (x & x-1) != 0
4496 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4497 // Keep the CTPOP if it is a cheap vector op.
4498 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4499 return SDValue();
4500
4501 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4502 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4503 return SDValue();
4504 if (C1 == 0 && (Cond == ISD::SETULT))
4505 return SDValue(); // This is handled elsewhere.
4506
4507 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4508
4509 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4510 SDValue Result = CTOp;
4511 for (unsigned i = 0; i < Passes; i++) {
4512 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4513 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4514 }
4516 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4517 }
4518
4519 // Expand a power-of-2 comparison based on ctpop
4520 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4521 // Keep the CTPOP if it is cheap.
4522 if (TLI.isCtpopFast(CTVT))
4523 return SDValue();
4524
4525 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4526 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4527 assert(CTVT.isInteger());
4528 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4529
4530 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4531 // check before emitting a potentially unnecessary op.
4532 if (DAG.isKnownNeverZero(CTOp)) {
4533 // (ctpop x) == 1 --> (x & x-1) == 0
4534 // (ctpop x) != 1 --> (x & x-1) != 0
4535 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4536 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4537 return RHS;
4538 }
4539
4540 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4541 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4542 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4544 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4545 }
4546
4547 return SDValue();
4548}
4549
4551 ISD::CondCode Cond, const SDLoc &dl,
4552 SelectionDAG &DAG) {
4553 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4554 return SDValue();
4555
4556 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4557 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4558 return SDValue();
4559
4560 auto getRotateSource = [](SDValue X) {
4561 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4562 return X.getOperand(0);
4563 return SDValue();
4564 };
4565
4566 // Peek through a rotated value compared against 0 or -1:
4567 // (rot X, Y) == 0/-1 --> X == 0/-1
4568 // (rot X, Y) != 0/-1 --> X != 0/-1
4569 if (SDValue R = getRotateSource(N0))
4570 return DAG.getSetCC(dl, VT, R, N1, Cond);
4571
4572 // Peek through an 'or' of a rotated value compared against 0:
4573 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4574 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4575 //
4576 // TODO: Add the 'and' with -1 sibling.
4577 // TODO: Recurse through a series of 'or' ops to find the rotate.
4578 EVT OpVT = N0.getValueType();
4579 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4580 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4581 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4582 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4583 }
4584 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4585 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4586 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4587 }
4588 }
4589
4590 return SDValue();
4591}
4592
4594 ISD::CondCode Cond, const SDLoc &dl,
4595 SelectionDAG &DAG) {
4596 // If we are testing for all-bits-clear, we might be able to do that with
4597 // less shifting since bit-order does not matter.
4598 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4599 return SDValue();
4600
4601 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4602 if (!C1 || !C1->isZero())
4603 return SDValue();
4604
4605 if (!N0.hasOneUse() ||
4606 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4607 return SDValue();
4608
4609 unsigned BitWidth = N0.getScalarValueSizeInBits();
4610 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4611 if (!ShAmtC)
4612 return SDValue();
4613
4614 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4615 if (ShAmt == 0)
4616 return SDValue();
4617
4618 // Canonicalize fshr as fshl to reduce pattern-matching.
4619 if (N0.getOpcode() == ISD::FSHR)
4620 ShAmt = BitWidth - ShAmt;
4621
4622 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4623 SDValue X, Y;
4624 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4625 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4626 return false;
4627 if (Or.getOperand(0) == Other) {
4628 X = Or.getOperand(0);
4629 Y = Or.getOperand(1);
4630 return true;
4631 }
4632 if (Or.getOperand(1) == Other) {
4633 X = Or.getOperand(1);
4634 Y = Or.getOperand(0);
4635 return true;
4636 }
4637 return false;
4638 };
4639
4640 EVT OpVT = N0.getValueType();
4641 EVT ShAmtVT = N0.getOperand(2).getValueType();
4642 SDValue F0 = N0.getOperand(0);
4643 SDValue F1 = N0.getOperand(1);
4644 if (matchOr(F0, F1)) {
4645 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4646 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4647 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4648 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4649 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4650 }
4651 if (matchOr(F1, F0)) {
4652 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4653 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4654 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4655 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4656 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4657 }
4658
4659 return SDValue();
4660}
4661
4662/// Try to simplify a setcc built with the specified operands and cc. If it is
4663/// unable to simplify it, return a null SDValue.
4665 ISD::CondCode Cond, bool foldBooleans,
4666 DAGCombinerInfo &DCI,
4667 const SDLoc &dl) const {
4668 SelectionDAG &DAG = DCI.DAG;
4669 const DataLayout &Layout = DAG.getDataLayout();
4670 EVT OpVT = N0.getValueType();
4672
4673 // Constant fold or commute setcc.
4674 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4675 return Fold;
4676
4677 bool N0ConstOrSplat =
4678 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4679 bool N1ConstOrSplat =
4680 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4681
4682 // Canonicalize toward having the constant on the RHS.
4683 // TODO: Handle non-splat vector constants. All undef causes trouble.
4684 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4685 // infinite loop here when we encounter one.
4687 if (N0ConstOrSplat && !N1ConstOrSplat &&
4688 (DCI.isBeforeLegalizeOps() ||
4689 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4690 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4691
4692 // If we have a subtract with the same 2 non-constant operands as this setcc
4693 // -- but in reverse order -- then try to commute the operands of this setcc
4694 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4695 // instruction on some targets.
4696 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4697 (DCI.isBeforeLegalizeOps() ||
4698 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4699 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4700 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4701 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4702
4703 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4704 return V;
4705
4706 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4707 return V;
4708
4709 if (auto *N1C = isConstOrConstSplat(N1)) {
4710 const APInt &C1 = N1C->getAPIntValue();
4711
4712 // Optimize some CTPOP cases.
4713 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4714 return V;
4715
4716 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4717 // X * Y == 0 --> (X == 0) || (Y == 0)
4718 // X * Y != 0 --> (X != 0) && (Y != 0)
4719 // TODO: This bails out if minsize is set, but if the target doesn't have a
4720 // single instruction multiply for this type, it would likely be
4721 // smaller to decompose.
4722 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4723 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4724 (N0->getFlags().hasNoUnsignedWrap() ||
4725 N0->getFlags().hasNoSignedWrap()) &&
4726 !Attr.hasFnAttr(Attribute::MinSize)) {
4727 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4728 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4729 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4730 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4731 }
4732
4733 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4734 // equality comparison, then we're just comparing whether X itself is
4735 // zero.
4736 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4737 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4738 llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4739 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4740 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4741 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4742 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4743 // (srl (ctlz x), 5) == 0 -> X != 0
4744 // (srl (ctlz x), 5) != 1 -> X != 0
4745 Cond = ISD::SETNE;
4746 } else {
4747 // (srl (ctlz x), 5) != 0 -> X == 0
4748 // (srl (ctlz x), 5) == 1 -> X == 0
4749 Cond = ISD::SETEQ;
4750 }
4751 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4752 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4753 Cond);
4754 }
4755 }
4756 }
4757 }
4758
4759 // FIXME: Support vectors.
4760 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4761 const APInt &C1 = N1C->getAPIntValue();
4762
4763 // (zext x) == C --> x == (trunc C)
4764 // (sext x) == C --> x == (trunc C)
4765 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4766 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4767 unsigned MinBits = N0.getValueSizeInBits();
4768 SDValue PreExt;
4769 bool Signed = false;
4770 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4771 // ZExt
4772 MinBits = N0->getOperand(0).getValueSizeInBits();
4773 PreExt = N0->getOperand(0);
4774 } else if (N0->getOpcode() == ISD::AND) {
4775 // DAGCombine turns costly ZExts into ANDs
4776 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4777 if ((C->getAPIntValue()+1).isPowerOf2()) {
4778 MinBits = C->getAPIntValue().countr_one();
4779 PreExt = N0->getOperand(0);
4780 }
4781 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4782 // SExt
4783 MinBits = N0->getOperand(0).getValueSizeInBits();
4784 PreExt = N0->getOperand(0);
4785 Signed = true;
4786 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4787 // ZEXTLOAD / SEXTLOAD
4788 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4789 MinBits = LN0->getMemoryVT().getSizeInBits();
4790 PreExt = N0;
4791 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4792 Signed = true;
4793 MinBits = LN0->getMemoryVT().getSizeInBits();
4794 PreExt = N0;
4795 }
4796 }
4797
4798 // Figure out how many bits we need to preserve this constant.
4799 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4800
4801 // Make sure we're not losing bits from the constant.
4802 if (MinBits > 0 &&
4803 MinBits < C1.getBitWidth() &&
4804 MinBits >= ReqdBits) {
4805 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4806 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4807 // Will get folded away.
4808 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4809 if (MinBits == 1 && C1 == 1)
4810 // Invert the condition.
4811 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4813 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4814 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4815 }
4816
4817 // If truncating the setcc operands is not desirable, we can still
4818 // simplify the expression in some cases:
4819 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4820 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4821 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4822 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4823 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4824 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4825 SDValue TopSetCC = N0->getOperand(0);
4826 unsigned N0Opc = N0->getOpcode();
4827 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4828 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4829 TopSetCC.getOpcode() == ISD::SETCC &&
4830 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4831 (isConstFalseVal(N1) ||
4832 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4833
4834 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4835 (!N1C->isZero() && Cond == ISD::SETNE);
4836
4837 if (!Inverse)
4838 return TopSetCC;
4839
4841 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4842 TopSetCC.getOperand(0).getValueType());
4843 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4844 TopSetCC.getOperand(1),
4845 InvCond);
4846 }
4847 }
4848 }
4849
4850 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4851 // equality or unsigned, and all 1 bits of the const are in the same
4852 // partial word, see if we can shorten the load.
4853 if (DCI.isBeforeLegalize() &&
4855 N0.getOpcode() == ISD::AND && C1 == 0 &&
4856 N0.getNode()->hasOneUse() &&
4857 isa<LoadSDNode>(N0.getOperand(0)) &&
4858 N0.getOperand(0).getNode()->hasOneUse() &&
4859 isa<ConstantSDNode>(N0.getOperand(1))) {
4860 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4861 APInt bestMask;
4862 unsigned bestWidth = 0, bestOffset = 0;
4863 if (Lod->isSimple() && Lod->isUnindexed() &&
4864 (Lod->getMemoryVT().isByteSized() ||
4865 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4866 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4867 unsigned origWidth = N0.getValueSizeInBits();
4868 unsigned maskWidth = origWidth;
4869 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4870 // 8 bits, but have to be careful...
4871 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4872 origWidth = Lod->getMemoryVT().getSizeInBits();
4873 const APInt &Mask = N0.getConstantOperandAPInt(1);
4874 // Only consider power-of-2 widths (and at least one byte) as candiates
4875 // for the narrowed load.
4876 for (unsigned width = 8; width < origWidth; width *= 2) {
4877 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4878 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4879 // Avoid accessing any padding here for now (we could use memWidth
4880 // instead of origWidth here otherwise).
4881 unsigned maxOffset = origWidth - width;
4882 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4883 if (Mask.isSubsetOf(newMask)) {
4884 unsigned ptrOffset =
4885 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4886 unsigned IsFast = 0;
4887 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4888 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4890 ptrOffset / 8) &&
4892 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4893 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4894 IsFast) {
4895 bestOffset = ptrOffset / 8;
4896 bestMask = Mask.lshr(offset);
4897 bestWidth = width;
4898 break;
4899 }
4900 }
4901 newMask <<= 8;
4902 }
4903 if (bestWidth)
4904 break;
4905 }
4906 }
4907 if (bestWidth) {
4908 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4909 SDValue Ptr = Lod->getBasePtr();
4910 if (bestOffset != 0)
4911 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4912 SDValue NewLoad =
4913 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4914 Lod->getPointerInfo().getWithOffset(bestOffset),
4915 Lod->getBaseAlign());
4916 SDValue And =
4917 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4918 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4919 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4920 }
4921 }
4922
4923 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4924 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4925 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4926
4927 // If the comparison constant has bits in the upper part, the
4928 // zero-extended value could never match.
4930 C1.getBitWidth() - InSize))) {
4931 switch (Cond) {
4932 case ISD::SETUGT:
4933 case ISD::SETUGE:
4934 case ISD::SETEQ:
4935 return DAG.getConstant(0, dl, VT);
4936 case ISD::SETULT:
4937 case ISD::SETULE:
4938 case ISD::SETNE:
4939 return DAG.getConstant(1, dl, VT);
4940 case ISD::SETGT:
4941 case ISD::SETGE:
4942 // True if the sign bit of C1 is set.
4943 return DAG.getConstant(C1.isNegative(), dl, VT);
4944 case ISD::SETLT:
4945 case ISD::SETLE:
4946 // True if the sign bit of C1 isn't set.
4947 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4948 default:
4949 break;
4950 }
4951 }
4952
4953 // Otherwise, we can perform the comparison with the low bits.
4954 switch (Cond) {
4955 case ISD::SETEQ:
4956 case ISD::SETNE:
4957 case ISD::SETUGT:
4958 case ISD::SETUGE:
4959 case ISD::SETULT:
4960 case ISD::SETULE: {
4961 EVT newVT = N0.getOperand(0).getValueType();
4962 // FIXME: Should use isNarrowingProfitable.
4963 if (DCI.isBeforeLegalizeOps() ||
4964 (isOperationLegal(ISD::SETCC, newVT) &&
4965 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4966 isTypeDesirableForOp(ISD::SETCC, newVT))) {
4967 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4968 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4969
4970 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4971 NewConst, Cond);
4972 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4973 }
4974 break;
4975 }
4976 default:
4977 break; // todo, be more careful with signed comparisons
4978 }
4979 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4980 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4981 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4982 OpVT)) {
4983 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4984 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4985 EVT ExtDstTy = N0.getValueType();
4986 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4987
4988 // If the constant doesn't fit into the number of bits for the source of
4989 // the sign extension, it is impossible for both sides to be equal.
4990 if (C1.getSignificantBits() > ExtSrcTyBits)
4991 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4992
4993 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4994 ExtDstTy != ExtSrcTy && "Unexpected types!");
4995 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4996 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4997 DAG.getConstant(Imm, dl, ExtDstTy));
4998 if (!DCI.isCalledByLegalizer())
4999 DCI.AddToWorklist(ZextOp.getNode());
5000 // Otherwise, make this a use of a zext.
5001 return DAG.getSetCC(dl, VT, ZextOp,
5002 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
5003 } else if ((N1C->isZero() || N1C->isOne()) &&
5004 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5005 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5006 // excluded as they are handled below whilst checking for foldBooleans.
5007 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5008 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
5009 (N0.getValueType() == MVT::i1 ||
5013 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5014 if (TrueWhenTrue)
5015 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
5016 // Invert the condition.
5017 if (N0.getOpcode() == ISD::SETCC) {
5018 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
5020 if (DCI.isBeforeLegalizeOps() ||
5022 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5023 }
5024 }
5025
5026 if ((N0.getOpcode() == ISD::XOR ||
5027 (N0.getOpcode() == ISD::AND &&
5028 N0.getOperand(0).getOpcode() == ISD::XOR &&
5029 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5030 isOneConstant(N0.getOperand(1))) {
5031 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5032 // can only do this if the top bits are known zero.
5033 unsigned BitWidth = N0.getValueSizeInBits();
5034 if (DAG.MaskedValueIsZero(N0,
5036 BitWidth-1))) {
5037 // Okay, get the un-inverted input value.
5038 SDValue Val;
5039 if (N0.getOpcode() == ISD::XOR) {
5040 Val = N0.getOperand(0);
5041 } else {
5042 assert(N0.getOpcode() == ISD::AND &&
5043 N0.getOperand(0).getOpcode() == ISD::XOR);
5044 // ((X^1)&1)^1 -> X & 1
5045 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5046 N0.getOperand(0).getOperand(0),
5047 N0.getOperand(1));
5048 }
5049
5050 return DAG.getSetCC(dl, VT, Val, N1,
5052 }
5053 } else if (N1C->isOne()) {
5054 SDValue Op0 = N0;
5055 if (Op0.getOpcode() == ISD::TRUNCATE)
5056 Op0 = Op0.getOperand(0);
5057
5058 if ((Op0.getOpcode() == ISD::XOR) &&
5059 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5060 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5061 SDValue XorLHS = Op0.getOperand(0);
5062 SDValue XorRHS = Op0.getOperand(1);
5063 // Ensure that the input setccs return an i1 type or 0/1 value.
5064 if (Op0.getValueType() == MVT::i1 ||
5069 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5071 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5072 }
5073 }
5074 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5075 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5076 if (Op0.getValueType().bitsGT(VT))
5077 Op0 = DAG.getNode(ISD::AND, dl, VT,
5078 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5079 DAG.getConstant(1, dl, VT));
5080 else if (Op0.getValueType().bitsLT(VT))
5081 Op0 = DAG.getNode(ISD::AND, dl, VT,
5082 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5083 DAG.getConstant(1, dl, VT));
5084
5085 return DAG.getSetCC(dl, VT, Op0,
5086 DAG.getConstant(0, dl, Op0.getValueType()),
5088 }
5089 if (Op0.getOpcode() == ISD::AssertZext &&
5090 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5091 return DAG.getSetCC(dl, VT, Op0,
5092 DAG.getConstant(0, dl, Op0.getValueType()),
5094 }
5095 }
5096
5097 // Given:
5098 // icmp eq/ne (urem %x, %y), 0
5099 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5100 // icmp eq/ne %x, 0
5101 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5102 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5103 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5104 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5105 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5106 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5107 }
5108
5109 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5110 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5111 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5112 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
5113 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5114 N1C->isAllOnes()) {
5115 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5116 DAG.getConstant(0, dl, OpVT),
5118 }
5119
5120 // fold (setcc (trunc x) c) -> (setcc x c)
5121 if (N0.getOpcode() == ISD::TRUNCATE &&
5123 (N0->getFlags().hasNoSignedWrap() &&
5125 isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) {
5126 EVT NewVT = N0.getOperand(0).getValueType();
5127 SDValue NewConst = DAG.getConstant(
5129 ? C1.sext(NewVT.getSizeInBits())
5130 : C1.zext(NewVT.getSizeInBits()),
5131 dl, NewVT);
5132 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5133 }
5134
5135 if (SDValue V =
5136 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5137 return V;
5138 }
5139
5140 // These simplifications apply to splat vectors as well.
5141 // TODO: Handle more splat vector cases.
5142 if (auto *N1C = isConstOrConstSplat(N1)) {
5143 const APInt &C1 = N1C->getAPIntValue();
5144
5145 APInt MinVal, MaxVal;
5146 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5148 MinVal = APInt::getSignedMinValue(OperandBitSize);
5149 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5150 } else {
5151 MinVal = APInt::getMinValue(OperandBitSize);
5152 MaxVal = APInt::getMaxValue(OperandBitSize);
5153 }
5154
5155 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5156 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5157 // X >= MIN --> true
5158 if (C1 == MinVal)
5159 return DAG.getBoolConstant(true, dl, VT, OpVT);
5160
5161 if (!VT.isVector()) { // TODO: Support this for vectors.
5162 // X >= C0 --> X > (C0 - 1)
5163 APInt C = C1 - 1;
5165 if ((DCI.isBeforeLegalizeOps() ||
5166 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5167 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5168 isLegalICmpImmediate(C.getSExtValue())))) {
5169 return DAG.getSetCC(dl, VT, N0,
5170 DAG.getConstant(C, dl, N1.getValueType()),
5171 NewCC);
5172 }
5173 }
5174 }
5175
5176 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5177 // X <= MAX --> true
5178 if (C1 == MaxVal)
5179 return DAG.getBoolConstant(true, dl, VT, OpVT);
5180
5181 // X <= C0 --> X < (C0 + 1)
5182 if (!VT.isVector()) { // TODO: Support this for vectors.
5183 APInt C = C1 + 1;
5185 if ((DCI.isBeforeLegalizeOps() ||
5186 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5187 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5188 isLegalICmpImmediate(C.getSExtValue())))) {
5189 return DAG.getSetCC(dl, VT, N0,
5190 DAG.getConstant(C, dl, N1.getValueType()),
5191 NewCC);
5192 }
5193 }
5194 }
5195
5196 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5197 if (C1 == MinVal)
5198 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5199
5200 // TODO: Support this for vectors after legalize ops.
5201 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5202 // Canonicalize setlt X, Max --> setne X, Max
5203 if (C1 == MaxVal)
5204 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5205
5206 // If we have setult X, 1, turn it into seteq X, 0
5207 if (C1 == MinVal+1)
5208 return DAG.getSetCC(dl, VT, N0,
5209 DAG.getConstant(MinVal, dl, N0.getValueType()),
5210 ISD::SETEQ);
5211 }
5212 }
5213
5214 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5215 if (C1 == MaxVal)
5216 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5217
5218 // TODO: Support this for vectors after legalize ops.
5219 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5220 // Canonicalize setgt X, Min --> setne X, Min
5221 if (C1 == MinVal)
5222 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5223
5224 // If we have setugt X, Max-1, turn it into seteq X, Max
5225 if (C1 == MaxVal-1)
5226 return DAG.getSetCC(dl, VT, N0,
5227 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5228 ISD::SETEQ);
5229 }
5230 }
5231
5232 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5233 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5234 if (C1.isZero())
5235 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5236 VT, N0, N1, Cond, DCI, dl))
5237 return CC;
5238
5239 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5240 // For example, when high 32-bits of i64 X are known clear:
5241 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5242 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5243 bool CmpZero = N1C->isZero();
5244 bool CmpNegOne = N1C->isAllOnes();
5245 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5246 // Match or(lo,shl(hi,bw/2)) pattern.
5247 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5248 unsigned EltBits = V.getScalarValueSizeInBits();
5249 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5250 return false;
5251 SDValue LHS = V.getOperand(0);
5252 SDValue RHS = V.getOperand(1);
5253 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5254 // Unshifted element must have zero upperbits.
5255 if (RHS.getOpcode() == ISD::SHL &&
5256 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5257 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5258 DAG.MaskedValueIsZero(LHS, HiBits)) {
5259 Lo = LHS;
5260 Hi = RHS.getOperand(0);
5261 return true;
5262 }
5263 if (LHS.getOpcode() == ISD::SHL &&
5264 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5265 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5266 DAG.MaskedValueIsZero(RHS, HiBits)) {
5267 Lo = RHS;
5268 Hi = LHS.getOperand(0);
5269 return true;
5270 }
5271 return false;
5272 };
5273
5274 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5275 unsigned EltBits = N0.getScalarValueSizeInBits();
5276 unsigned HalfBits = EltBits / 2;
5277 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5278 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5279 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5280 SDValue NewN0 =
5281 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5282 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5283 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5284 };
5285
5286 SDValue Lo, Hi;
5287 if (IsConcat(N0, Lo, Hi))
5288 return MergeConcat(Lo, Hi);
5289
5290 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5291 SDValue Lo0, Lo1, Hi0, Hi1;
5292 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5293 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5294 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5295 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5296 }
5297 }
5298 }
5299 }
5300
5301 // If we have "setcc X, C0", check to see if we can shrink the immediate
5302 // by changing cc.
5303 // TODO: Support this for vectors after legalize ops.
5304 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5305 // SETUGT X, SINTMAX -> SETLT X, 0
5306 // SETUGE X, SINTMIN -> SETLT X, 0
5307 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5308 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5309 return DAG.getSetCC(dl, VT, N0,
5310 DAG.getConstant(0, dl, N1.getValueType()),
5311 ISD::SETLT);
5312
5313 // SETULT X, SINTMIN -> SETGT X, -1
5314 // SETULE X, SINTMAX -> SETGT X, -1
5315 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5316 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5317 return DAG.getSetCC(dl, VT, N0,
5318 DAG.getAllOnesConstant(dl, N1.getValueType()),
5319 ISD::SETGT);
5320 }
5321 }
5322
5323 // Back to non-vector simplifications.
5324 // TODO: Can we do these for vector splats?
5325 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5326 const APInt &C1 = N1C->getAPIntValue();
5327 EVT ShValTy = N0.getValueType();
5328
5329 // Fold bit comparisons when we can. This will result in an
5330 // incorrect value when boolean false is negative one, unless
5331 // the bitsize is 1 in which case the false value is the same
5332 // in practice regardless of the representation.
5333 if ((VT.getSizeInBits() == 1 ||
5335 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5336 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5337 N0.getOpcode() == ISD::AND) {
5338 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5339 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5340 // Perform the xform if the AND RHS is a single bit.
5341 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5342 if (AndRHS->getAPIntValue().isPowerOf2() &&
5343 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5344 return DAG.getNode(
5345 ISD::TRUNCATE, dl, VT,
5346 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5347 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5348 }
5349 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5350 // (X & 8) == 8 --> (X & 8) >> 3
5351 // Perform the xform if C1 is a single bit.
5352 unsigned ShCt = C1.logBase2();
5353 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5354 return DAG.getNode(
5355 ISD::TRUNCATE, dl, VT,
5356 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5357 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5358 }
5359 }
5360 }
5361 }
5362
5363 if (C1.getSignificantBits() <= 64 &&
5365 // (X & -256) == 256 -> (X >> 8) == 1
5366 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5367 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5368 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5369 const APInt &AndRHSC = AndRHS->getAPIntValue();
5370 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5371 unsigned ShiftBits = AndRHSC.countr_zero();
5372 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5373 // If using an unsigned shift doesn't yield a legal compare
5374 // immediate, try using sra instead.
5375 APInt NewC = C1.lshr(ShiftBits);
5376 if (NewC.getSignificantBits() <= 64 &&
5378 APInt SignedC = C1.ashr(ShiftBits);
5379 if (SignedC.getSignificantBits() <= 64 &&
5381 SDValue Shift = DAG.getNode(
5382 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5383 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5384 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5385 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5386 }
5387 }
5388 SDValue Shift = DAG.getNode(
5389 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5390 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5391 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5392 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5393 }
5394 }
5395 }
5396 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5397 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5398 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5399 // X < 0x100000000 -> (X >> 32) < 1
5400 // X >= 0x100000000 -> (X >> 32) >= 1
5401 // X <= 0x0ffffffff -> (X >> 32) < 1
5402 // X > 0x0ffffffff -> (X >> 32) >= 1
5403 unsigned ShiftBits;
5404 APInt NewC = C1;
5405 ISD::CondCode NewCond = Cond;
5406 if (AdjOne) {
5407 ShiftBits = C1.countr_one();
5408 NewC = NewC + 1;
5409 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5410 } else {
5411 ShiftBits = C1.countr_zero();
5412 }
5413 NewC.lshrInPlace(ShiftBits);
5414 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5416 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5417 SDValue Shift =
5418 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5419 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5420 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5421 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5422 }
5423 }
5424 }
5425 }
5426
5427 if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5428 auto *CFP = cast<ConstantFPSDNode>(N1);
5429 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5430
5431 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5432 // constant if knowing that the operand is non-nan is enough. We prefer to
5433 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5434 // materialize 0.0.
5435 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5436 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5437
5438 // setcc (fneg x), C -> setcc swap(pred) x, -C
5439 if (N0.getOpcode() == ISD::FNEG) {
5441 if (DCI.isBeforeLegalizeOps() ||
5442 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5443 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5444 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5445 }
5446 }
5447
5448 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5450 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5451 bool IsFabs = N0.getOpcode() == ISD::FABS;
5452 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5453 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5454 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5455 : (IsFabs ? fcInf : fcPosInf);
5456 if (Cond == ISD::SETUEQ)
5457 Flag |= fcNan;
5458 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5459 DAG.getTargetConstant(Flag, dl, MVT::i32));
5460 }
5461 }
5462
5463 // If the condition is not legal, see if we can find an equivalent one
5464 // which is legal.
5466 // If the comparison was an awkward floating-point == or != and one of
5467 // the comparison operands is infinity or negative infinity, convert the
5468 // condition to a less-awkward <= or >=.
5469 if (CFP->getValueAPF().isInfinity()) {
5470 bool IsNegInf = CFP->getValueAPF().isNegative();
5472 switch (Cond) {
5473 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5474 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5475 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5476 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5477 default: break;
5478 }
5479 if (NewCond != ISD::SETCC_INVALID &&
5480 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5481 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5482 }
5483 }
5484 }
5485
5486 if (N0 == N1) {
5487 // The sext(setcc()) => setcc() optimization relies on the appropriate
5488 // constant being emitted.
5489 assert(!N0.getValueType().isInteger() &&
5490 "Integer types should be handled by FoldSetCC");
5491
5492 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5493 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5494 if (UOF == 2) // FP operators that are undefined on NaNs.
5495 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5496 if (UOF == unsigned(EqTrue))
5497 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5498 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5499 // if it is not already.
5500 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5501 if (NewCond != Cond &&
5502 (DCI.isBeforeLegalizeOps() ||
5503 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5504 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5505 }
5506
5507 // ~X > ~Y --> Y > X
5508 // ~X < ~Y --> Y < X
5509 // ~X < C --> X > ~C
5510 // ~X > C --> X < ~C
5511 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5512 N0.getValueType().isInteger()) {
5513 if (isBitwiseNot(N0)) {
5514 if (isBitwiseNot(N1))
5515 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5516
5519 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5520 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5521 }
5522 }
5523 }
5524
5525 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5526 N0.getValueType().isInteger()) {
5527 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5528 N0.getOpcode() == ISD::XOR) {
5529 // Simplify (X+Y) == (X+Z) --> Y == Z
5530 if (N0.getOpcode() == N1.getOpcode()) {
5531 if (N0.getOperand(0) == N1.getOperand(0))
5532 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5533 if (N0.getOperand(1) == N1.getOperand(1))
5534 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5535 if (isCommutativeBinOp(N0.getOpcode())) {
5536 // If X op Y == Y op X, try other combinations.
5537 if (N0.getOperand(0) == N1.getOperand(1))
5538 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5539 Cond);
5540 if (N0.getOperand(1) == N1.getOperand(0))
5541 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5542 Cond);
5543 }
5544 }
5545
5546 // If RHS is a legal immediate value for a compare instruction, we need
5547 // to be careful about increasing register pressure needlessly.
5548 bool LegalRHSImm = false;
5549
5550 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5551 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5552 // Turn (X+C1) == C2 --> X == C2-C1
5553 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5554 return DAG.getSetCC(
5555 dl, VT, N0.getOperand(0),
5556 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5557 dl, N0.getValueType()),
5558 Cond);
5559
5560 // Turn (X^C1) == C2 --> X == C1^C2
5561 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5562 return DAG.getSetCC(
5563 dl, VT, N0.getOperand(0),
5564 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5565 dl, N0.getValueType()),
5566 Cond);
5567 }
5568
5569 // Turn (C1-X) == C2 --> X == C1-C2
5570 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5571 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5572 return DAG.getSetCC(
5573 dl, VT, N0.getOperand(1),
5574 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5575 dl, N0.getValueType()),
5576 Cond);
5577
5578 // Could RHSC fold directly into a compare?
5579 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5580 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5581 }
5582
5583 // (X+Y) == X --> Y == 0 and similar folds.
5584 // Don't do this if X is an immediate that can fold into a cmp
5585 // instruction and X+Y has other uses. It could be an induction variable
5586 // chain, and the transform would increase register pressure.
5587 if (!LegalRHSImm || N0.hasOneUse())
5588 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5589 return V;
5590 }
5591
5592 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5593 N1.getOpcode() == ISD::XOR)
5594 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5595 return V;
5596
5597 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5598 return V;
5599
5600 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5601 return V;
5602 }
5603
5604 // Fold remainder of division by a constant.
5605 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5606 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5607 // When division is cheap or optimizing for minimum size,
5608 // fall through to DIVREM creation by skipping this fold.
5609 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5610 if (N0.getOpcode() == ISD::UREM) {
5611 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5612 return Folded;
5613 } else if (N0.getOpcode() == ISD::SREM) {
5614 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5615 return Folded;
5616 }
5617 }
5618 }
5619
5620 // Fold away ALL boolean setcc's.
5621 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5622 SDValue Temp;
5623 switch (Cond) {
5624 default: llvm_unreachable("Unknown integer setcc!");
5625 case ISD::SETEQ: // X == Y -> ~(X^Y)
5626 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5627 N0 = DAG.getNOT(dl, Temp, OpVT);
5628 if (!DCI.isCalledByLegalizer())
5629 DCI.AddToWorklist(Temp.getNode());
5630 break;
5631 case ISD::SETNE: // X != Y --> (X^Y)
5632 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5633 break;
5634 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5635 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5636 Temp = DAG.getNOT(dl, N0, OpVT);
5637 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5638 if (!DCI.isCalledByLegalizer())
5639 DCI.AddToWorklist(Temp.getNode());
5640 break;
5641 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5642 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5643 Temp = DAG.getNOT(dl, N1, OpVT);
5644 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5645 if (!DCI.isCalledByLegalizer())
5646 DCI.AddToWorklist(Temp.getNode());
5647 break;
5648 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5649 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5650 Temp = DAG.getNOT(dl, N0, OpVT);
5651 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5652 if (!DCI.isCalledByLegalizer())
5653 DCI.AddToWorklist(Temp.getNode());
5654 break;
5655 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5656 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5657 Temp = DAG.getNOT(dl, N1, OpVT);
5658 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5659 break;
5660 }
5661 if (VT.getScalarType() != MVT::i1) {
5662 if (!DCI.isCalledByLegalizer())
5663 DCI.AddToWorklist(N0.getNode());
5664 // FIXME: If running after legalize, we probably can't do this.
5666 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5667 }
5668 return N0;
5669 }
5670
5671 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5672 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5673 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5675 N1->getFlags().hasNoUnsignedWrap()) ||
5677 N1->getFlags().hasNoSignedWrap())) &&
5678 isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) {
5679 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5680 }
5681
5682 // Could not fold it.
5683 return SDValue();
5684}
5685
5686/// Returns true (and the GlobalValue and the offset) if the node is a
5687/// GlobalAddress + offset.
5689 int64_t &Offset) const {
5690
5691 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5692
5693 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5694 GA = GASD->getGlobal();
5695 Offset += GASD->getOffset();
5696 return true;
5697 }
5698
5699 if (N->getOpcode() == ISD::ADD) {
5700 SDValue N1 = N->getOperand(0);
5701 SDValue N2 = N->getOperand(1);
5702 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5703 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5704 Offset += V->getSExtValue();
5705 return true;
5706 }
5707 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5708 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5709 Offset += V->getSExtValue();
5710 return true;
5711 }
5712 }
5713 }
5714
5715 return false;
5716}
5717
5719 DAGCombinerInfo &DCI) const {
5720 // Default implementation: no optimization.
5721 return SDValue();
5722}
5723
5724//===----------------------------------------------------------------------===//
5725// Inline Assembler Implementation Methods
5726//===----------------------------------------------------------------------===//
5727
5730 unsigned S = Constraint.size();
5731
5732 if (S == 1) {
5733 switch (Constraint[0]) {
5734 default: break;
5735 case 'r':
5736 return C_RegisterClass;
5737 case 'm': // memory
5738 case 'o': // offsetable
5739 case 'V': // not offsetable
5740 return C_Memory;
5741 case 'p': // Address.
5742 return C_Address;
5743 case 'n': // Simple Integer
5744 case 'E': // Floating Point Constant
5745 case 'F': // Floating Point Constant
5746 return C_Immediate;
5747 case 'i': // Simple Integer or Relocatable Constant
5748 case 's': // Relocatable Constant
5749 case 'X': // Allow ANY value.
5750 case 'I': // Target registers.
5751 case 'J':
5752 case 'K':
5753 case 'L':
5754 case 'M':
5755 case 'N':
5756 case 'O':
5757 case 'P':
5758 case '<':
5759 case '>':
5760 return C_Other;
5761 }
5762 }
5763
5764 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5765 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5766 return C_Memory;
5767 return C_Register;
5768 }
5769 return C_Unknown;
5770}
5771
5772/// Try to replace an X constraint, which matches anything, with another that
5773/// has more specific requirements based on the type of the corresponding
5774/// operand.
5775const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5776 if (ConstraintVT.isInteger())
5777 return "r";
5778 if (ConstraintVT.isFloatingPoint())
5779 return "f"; // works for many targets
5780 return nullptr;
5781}
5782
5784 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5785 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5786 return SDValue();
5787}
5788
5789/// Lower the specified operand into the Ops vector.
5790/// If it is invalid, don't add anything to Ops.
5792 StringRef Constraint,
5793 std::vector<SDValue> &Ops,
5794 SelectionDAG &DAG) const {
5795
5796 if (Constraint.size() > 1)
5797 return;
5798
5799 char ConstraintLetter = Constraint[0];
5800 switch (ConstraintLetter) {
5801 default: break;
5802 case 'X': // Allows any operand
5803 case 'i': // Simple Integer or Relocatable Constant
5804 case 'n': // Simple Integer
5805 case 's': { // Relocatable Constant
5806
5808 uint64_t Offset = 0;
5809
5810 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5811 // etc., since getelementpointer is variadic. We can't use
5812 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5813 // while in this case the GA may be furthest from the root node which is
5814 // likely an ISD::ADD.
5815 while (true) {
5816 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5817 // gcc prints these as sign extended. Sign extend value to 64 bits
5818 // now; without this it would get ZExt'd later in
5819 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5820 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5821 BooleanContent BCont = getBooleanContents(MVT::i64);
5822 ISD::NodeType ExtOpc =
5823 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5824 int64_t ExtVal =
5825 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5826 Ops.push_back(
5827 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5828 return;
5829 }
5830 if (ConstraintLetter != 'n') {
5831 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5832 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5833 GA->getValueType(0),
5834 Offset + GA->getOffset()));
5835 return;
5836 }
5837 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5838 Ops.push_back(DAG.getTargetBlockAddress(
5839 BA->getBlockAddress(), BA->getValueType(0),
5840 Offset + BA->getOffset(), BA->getTargetFlags()));
5841 return;
5842 }
5843 if (isa<BasicBlockSDNode>(Op)) {
5844 Ops.push_back(Op);
5845 return;
5846 }
5847 }
5848 const unsigned OpCode = Op.getOpcode();
5849 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5850 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5851 Op = Op.getOperand(1);
5852 // Subtraction is not commutative.
5853 else if (OpCode == ISD::ADD &&
5854 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5855 Op = Op.getOperand(0);
5856 else
5857 return;
5858 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5859 continue;
5860 }
5861 return;
5862 }
5863 break;
5864 }
5865 }
5866}
5867
5869 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5870}
5871
5872std::pair<unsigned, const TargetRegisterClass *>
5874 StringRef Constraint,
5875 MVT VT) const {
5876 if (!Constraint.starts_with("{"))
5877 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5878 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5879
5880 // Remove the braces from around the name.
5881 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5882
5883 std::pair<unsigned, const TargetRegisterClass *> R =
5884 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5885
5886 // Figure out which register class contains this reg.
5887 for (const TargetRegisterClass *RC : RI->regclasses()) {
5888 // If none of the value types for this register class are valid, we
5889 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5890 if (!isLegalRC(*RI, *RC))
5891 continue;
5892
5893 for (const MCPhysReg &PR : *RC) {
5894 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5895 std::pair<unsigned, const TargetRegisterClass *> S =
5896 std::make_pair(PR, RC);
5897
5898 // If this register class has the requested value type, return it,
5899 // otherwise keep searching and return the first class found
5900 // if no other is found which explicitly has the requested type.
5901 if (RI->isTypeLegalForClass(*RC, VT))
5902 return S;
5903 if (!R.second)
5904 R = S;
5905 }
5906 }
5907 }
5908
5909 return R;
5910}
5911
5912//===----------------------------------------------------------------------===//
5913// Constraint Selection.
5914
5915/// Return true of this is an input operand that is a matching constraint like
5916/// "4".
5918 assert(!ConstraintCode.empty() && "No known constraint!");
5919 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5920}
5921
5922/// If this is an input matching constraint, this method returns the output
5923/// operand it matches.
5925 assert(!ConstraintCode.empty() && "No known constraint!");
5926 return atoi(ConstraintCode.c_str());
5927}
5928
5929/// Split up the constraint string from the inline assembly value into the
5930/// specific constraints and their prefixes, and also tie in the associated
5931/// operand values.
5932/// If this returns an empty vector, and if the constraint string itself
5933/// isn't empty, there was an error parsing.
5936 const TargetRegisterInfo *TRI,
5937 const CallBase &Call) const {
5938 /// Information about all of the constraints.
5939 AsmOperandInfoVector ConstraintOperands;
5940 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5941 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5942
5943 // Do a prepass over the constraints, canonicalizing them, and building up the
5944 // ConstraintOperands list.
5945 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5946 unsigned ResNo = 0; // ResNo - The result number of the next output.
5947 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5948
5949 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5950 ConstraintOperands.emplace_back(std::move(CI));
5951 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5952
5953 // Update multiple alternative constraint count.
5954 if (OpInfo.multipleAlternatives.size() > maCount)
5955 maCount = OpInfo.multipleAlternatives.size();
5956
5957 OpInfo.ConstraintVT = MVT::Other;
5958
5959 // Compute the value type for each operand.
5960 switch (OpInfo.Type) {
5962 // Indirect outputs just consume an argument.
5963 if (OpInfo.isIndirect) {
5964 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5965 break;
5966 }
5967
5968 // The return value of the call is this value. As such, there is no
5969 // corresponding argument.
5970 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5971 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5972 OpInfo.ConstraintVT =
5973 getAsmOperandValueType(DL, STy->getElementType(ResNo))
5974 .getSimpleVT();
5975 } else {
5976 assert(ResNo == 0 && "Asm only has one result!");
5977 OpInfo.ConstraintVT =
5978 getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5979 }
5980 ++ResNo;
5981 break;
5982 case InlineAsm::isInput:
5983 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5984 break;
5985 case InlineAsm::isLabel:
5986 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5987 ++LabelNo;
5988 continue;
5990 // Nothing to do.
5991 break;
5992 }
5993
5994 if (OpInfo.CallOperandVal) {
5995 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5996 if (OpInfo.isIndirect) {
5997 OpTy = Call.getParamElementType(ArgNo);
5998 assert(OpTy && "Indirect operand must have elementtype attribute");
5999 }
6000
6001 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6002 if (StructType *STy = dyn_cast<StructType>(OpTy))
6003 if (STy->getNumElements() == 1)
6004 OpTy = STy->getElementType(0);
6005
6006 // If OpTy is not a single value, it may be a struct/union that we
6007 // can tile with integers.
6008 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6009 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6010 switch (BitSize) {
6011 default: break;
6012 case 1:
6013 case 8:
6014 case 16:
6015 case 32:
6016 case 64:
6017 case 128:
6018 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6019 break;
6020 }
6021 }
6022
6023 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6024 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6025 ArgNo++;
6026 }
6027 }
6028
6029 // If we have multiple alternative constraints, select the best alternative.
6030 if (!ConstraintOperands.empty()) {
6031 if (maCount) {
6032 unsigned bestMAIndex = 0;
6033 int bestWeight = -1;
6034 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6035 int weight = -1;
6036 unsigned maIndex;
6037 // Compute the sums of the weights for each alternative, keeping track
6038 // of the best (highest weight) one so far.
6039 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6040 int weightSum = 0;
6041 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6042 cIndex != eIndex; ++cIndex) {
6043 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6044 if (OpInfo.Type == InlineAsm::isClobber)
6045 continue;
6046
6047 // If this is an output operand with a matching input operand,
6048 // look up the matching input. If their types mismatch, e.g. one
6049 // is an integer, the other is floating point, or their sizes are
6050 // different, flag it as an maCantMatch.
6051 if (OpInfo.hasMatchingInput()) {
6052 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6053 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6054 if ((OpInfo.ConstraintVT.isInteger() !=
6055 Input.ConstraintVT.isInteger()) ||
6056 (OpInfo.ConstraintVT.getSizeInBits() !=
6057 Input.ConstraintVT.getSizeInBits())) {
6058 weightSum = -1; // Can't match.
6059 break;
6060 }
6061 }
6062 }
6063 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6064 if (weight == -1) {
6065 weightSum = -1;
6066 break;
6067 }
6068 weightSum += weight;
6069 }
6070 // Update best.
6071 if (weightSum > bestWeight) {
6072 bestWeight = weightSum;
6073 bestMAIndex = maIndex;
6074 }
6075 }
6076
6077 // Now select chosen alternative in each constraint.
6078 for (AsmOperandInfo &cInfo : ConstraintOperands)
6079 if (cInfo.Type != InlineAsm::isClobber)
6080 cInfo.selectAlternative(bestMAIndex);
6081 }
6082 }
6083
6084 // Check and hook up tied operands, choose constraint code to use.
6085 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6086 cIndex != eIndex; ++cIndex) {
6087 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6088
6089 // If this is an output operand with a matching input operand, look up the
6090 // matching input. If their types mismatch, e.g. one is an integer, the
6091 // other is floating point, or their sizes are different, flag it as an
6092 // error.
6093 if (OpInfo.hasMatchingInput()) {
6094 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6095
6096 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6097 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6098 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6099 OpInfo.ConstraintVT);
6100 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6101 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6102 Input.ConstraintVT);
6103 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6105 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6107 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6108 (MatchRC.second != InputRC.second)) {
6109 report_fatal_error("Unsupported asm: input constraint"
6110 " with a matching output constraint of"
6111 " incompatible type!");
6112 }
6113 }
6114 }
6115 }
6116
6117 return ConstraintOperands;
6118}
6119
6120/// Return a number indicating our preference for chosing a type of constraint
6121/// over another, for the purpose of sorting them. Immediates are almost always
6122/// preferrable (when they can be emitted). A higher return value means a
6123/// stronger preference for one constraint type relative to another.
6124/// FIXME: We should prefer registers over memory but doing so may lead to
6125/// unrecoverable register exhaustion later.
6126/// https://github.com/llvm/llvm-project/issues/20571
6128 switch (CT) {
6131 return 4;
6134 return 3;
6136 return 2;
6138 return 1;
6140 return 0;
6141 }
6142 llvm_unreachable("Invalid constraint type");
6143}
6144
6145/// Examine constraint type and operand type and determine a weight value.
6146/// This object must already have been set up with the operand type
6147/// and the current alternative constraint selected.
6150 AsmOperandInfo &info, int maIndex) const {
6152 if (maIndex >= (int)info.multipleAlternatives.size())
6153 rCodes = &info.Codes;
6154 else
6155 rCodes = &info.multipleAlternatives[maIndex].Codes;
6156 ConstraintWeight BestWeight = CW_Invalid;
6157
6158 // Loop over the options, keeping track of the most general one.
6159 for (const std::string &rCode : *rCodes) {
6160 ConstraintWeight weight =
6161 getSingleConstraintMatchWeight(info, rCode.c_str());
6162 if (weight > BestWeight)
6163 BestWeight = weight;
6164 }
6165
6166 return BestWeight;
6167}
6168
6169/// Examine constraint type and operand type and determine a weight value.
6170/// This object must already have been set up with the operand type
6171/// and the current alternative constraint selected.
6174 AsmOperandInfo &info, const char *constraint) const {
6175 ConstraintWeight weight = CW_Invalid;
6176 Value *CallOperandVal = info.CallOperandVal;
6177 // If we don't have a value, we can't do a match,
6178 // but allow it at the lowest weight.
6179 if (!CallOperandVal)
6180 return CW_Default;
6181 // Look at the constraint type.
6182 switch (*constraint) {
6183 case 'i': // immediate integer.
6184 case 'n': // immediate integer with a known value.
6185 if (isa<ConstantInt>(CallOperandVal))
6186 weight = CW_Constant;
6187 break;
6188 case 's': // non-explicit intregal immediate.
6189 if (isa<GlobalValue>(CallOperandVal))
6190 weight = CW_Constant;
6191 break;
6192 case 'E': // immediate float if host format.
6193 case 'F': // immediate float.
6194 if (isa<ConstantFP>(CallOperandVal))
6195 weight = CW_Constant;
6196 break;
6197 case '<': // memory operand with autodecrement.
6198 case '>': // memory operand with autoincrement.
6199 case 'm': // memory operand.
6200 case 'o': // offsettable memory operand
6201 case 'V': // non-offsettable memory operand
6202 weight = CW_Memory;
6203 break;
6204 case 'r': // general register.
6205 case 'g': // general register, memory operand or immediate integer.
6206 // note: Clang converts "g" to "imr".
6207 if (CallOperandVal->getType()->isIntegerTy())
6208 weight = CW_Register;
6209 break;
6210 case 'X': // any operand.
6211 default:
6212 weight = CW_Default;
6213 break;
6214 }
6215 return weight;
6216}
6217
6218/// If there are multiple different constraints that we could pick for this
6219/// operand (e.g. "imr") try to pick the 'best' one.
6220/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6221/// into seven classes:
6222/// Register -> one specific register
6223/// RegisterClass -> a group of regs
6224/// Memory -> memory
6225/// Address -> a symbolic memory reference
6226/// Immediate -> immediate values
6227/// Other -> magic values (such as "Flag Output Operands")
6228/// Unknown -> something we don't recognize yet and can't handle
6229/// Ideally, we would pick the most specific constraint possible: if we have
6230/// something that fits into a register, we would pick it. The problem here
6231/// is that if we have something that could either be in a register or in
6232/// memory that use of the register could cause selection of *other*
6233/// operands to fail: they might only succeed if we pick memory. Because of
6234/// this the heuristic we use is:
6235///
6236/// 1) If there is an 'other' constraint, and if the operand is valid for
6237/// that constraint, use it. This makes us take advantage of 'i'
6238/// constraints when available.
6239/// 2) Otherwise, pick the most general constraint present. This prefers
6240/// 'm' over 'r', for example.
6241///
6243 TargetLowering::AsmOperandInfo &OpInfo) const {
6244 ConstraintGroup Ret;
6245
6246 Ret.reserve(OpInfo.Codes.size());
6247 for (StringRef Code : OpInfo.Codes) {
6248 TargetLowering::ConstraintType CType = getConstraintType(Code);
6249
6250 // Indirect 'other' or 'immediate' constraints are not allowed.
6251 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6252 CType == TargetLowering::C_Register ||
6254 continue;
6255
6256 // Things with matching constraints can only be registers, per gcc
6257 // documentation. This mainly affects "g" constraints.
6258 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6259 continue;
6260
6261 Ret.emplace_back(Code, CType);
6262 }
6263
6265 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6266 });
6267
6268 return Ret;
6269}
6270
6271/// If we have an immediate, see if we can lower it. Return true if we can,
6272/// false otherwise.
6274 SDValue Op, SelectionDAG *DAG,
6275 const TargetLowering &TLI) {
6276
6277 assert((P.second == TargetLowering::C_Other ||
6278 P.second == TargetLowering::C_Immediate) &&
6279 "need immediate or other");
6280
6281 if (!Op.getNode())
6282 return false;
6283
6284 std::vector<SDValue> ResultOps;
6285 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6286 return !ResultOps.empty();
6287}
6288
6289/// Determines the constraint code and constraint type to use for the specific
6290/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6292 SDValue Op,
6293 SelectionDAG *DAG) const {
6294 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6295
6296 // Single-letter constraints ('r') are very common.
6297 if (OpInfo.Codes.size() == 1) {
6298 OpInfo.ConstraintCode = OpInfo.Codes[0];
6299 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6300 } else {
6301 ConstraintGroup G = getConstraintPreferences(OpInfo);
6302 if (G.empty())
6303 return;
6304
6305 unsigned BestIdx = 0;
6306 for (const unsigned E = G.size();
6307 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6308 G[BestIdx].second == TargetLowering::C_Immediate);
6309 ++BestIdx) {
6310 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6311 break;
6312 // If we're out of constraints, just pick the first one.
6313 if (BestIdx + 1 == E) {
6314 BestIdx = 0;
6315 break;
6316 }
6317 }
6318
6319 OpInfo.ConstraintCode = G[BestIdx].first;
6320 OpInfo.ConstraintType = G[BestIdx].second;
6321 }
6322
6323 // 'X' matches anything.
6324 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6325 // Constants are handled elsewhere. For Functions, the type here is the
6326 // type of the result, which is not what we want to look at; leave them
6327 // alone.
6328 Value *v = OpInfo.CallOperandVal;
6329 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6330 return;
6331 }
6332
6333 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6334 OpInfo.ConstraintCode = "i";
6335 return;
6336 }
6337
6338 // Otherwise, try to resolve it to something we know about by looking at
6339 // the actual operand type.
6340 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6341 OpInfo.ConstraintCode = Repl;
6342 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6343 }
6344 }
6345}
6346
6347/// Given an exact SDIV by a constant, create a multiplication
6348/// with the multiplicative inverse of the constant.
6349/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6351 const SDLoc &dl, SelectionDAG &DAG,
6352 SmallVectorImpl<SDNode *> &Created) {
6353 SDValue Op0 = N->getOperand(0);
6354 SDValue Op1 = N->getOperand(1);
6355 EVT VT = N->getValueType(0);
6356 EVT SVT = VT.getScalarType();
6357 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6358 EVT ShSVT = ShVT.getScalarType();
6359
6360 bool UseSRA = false;
6361 SmallVector<SDValue, 16> Shifts, Factors;
6362
6363 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6364 if (C->isZero())
6365 return false;
6366 APInt Divisor = C->getAPIntValue();
6367 unsigned Shift = Divisor.countr_zero();
6368 if (Shift) {
6369 Divisor.ashrInPlace(Shift);
6370 UseSRA = true;
6371 }
6372 APInt Factor = Divisor.multiplicativeInverse();
6373 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6374 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6375 return true;
6376 };
6377
6378 // Collect all magic values from the build vector.
6379 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6380 return SDValue();
6381
6382 SDValue Shift, Factor;
6383 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6384 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6385 Factor = DAG.getBuildVector(VT, dl, Factors);
6386 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6387 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6388 "Expected matchUnaryPredicate to return one element for scalable "
6389 "vectors");
6390 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6391 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6392 } else {
6393 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6394 Shift = Shifts[0];
6395 Factor = Factors[0];
6396 }
6397
6398 SDValue Res = Op0;
6399 if (UseSRA) {
6400 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6401 Created.push_back(Res.getNode());
6402 }
6403
6404 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6405}
6406
6407/// Given an exact UDIV by a constant, create a multiplication
6408/// with the multiplicative inverse of the constant.
6409/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6411 const SDLoc &dl, SelectionDAG &DAG,
6412 SmallVectorImpl<SDNode *> &Created) {
6413 EVT VT = N->getValueType(0);
6414 EVT SVT = VT.getScalarType();
6415 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6416 EVT ShSVT = ShVT.getScalarType();
6417
6418 bool UseSRL = false;
6419 SmallVector<SDValue, 16> Shifts, Factors;
6420
6421 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6422 if (C->isZero())
6423 return false;
6424 APInt Divisor = C->getAPIntValue();
6425 unsigned Shift = Divisor.countr_zero();
6426 if (Shift) {
6427 Divisor.lshrInPlace(Shift);
6428 UseSRL = true;
6429 }
6430 // Calculate the multiplicative inverse modulo BW.
6431 APInt Factor = Divisor.multiplicativeInverse();
6432 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6433 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6434 return true;
6435 };
6436
6437 SDValue Op1 = N->getOperand(1);
6438
6439 // Collect all magic values from the build vector.
6440 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6441 return SDValue();
6442
6443 SDValue Shift, Factor;
6444 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6445 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6446 Factor = DAG.getBuildVector(VT, dl, Factors);
6447 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6448 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6449 "Expected matchUnaryPredicate to return one element for scalable "
6450 "vectors");
6451 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6452 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6453 } else {
6454 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6455 Shift = Shifts[0];
6456 Factor = Factors[0];
6457 }
6458
6459 SDValue Res = N->getOperand(0);
6460 if (UseSRL) {
6461 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6462 Created.push_back(Res.getNode());
6463 }
6464
6465 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6466}
6467
6469 SelectionDAG &DAG,
6470 SmallVectorImpl<SDNode *> &Created) const {
6472 if (isIntDivCheap(N->getValueType(0), Attr))
6473 return SDValue(N, 0); // Lower SDIV as SDIV
6474 return SDValue();
6475}
6476
6477SDValue
6479 SelectionDAG &DAG,
6480 SmallVectorImpl<SDNode *> &Created) const {
6482 if (isIntDivCheap(N->getValueType(0), Attr))
6483 return SDValue(N, 0); // Lower SREM as SREM
6484 return SDValue();
6485}
6486
6487/// Build sdiv by power-of-2 with conditional move instructions
6488/// Ref: "Hacker's Delight" by Henry Warren 10-1
6489/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6490/// bgez x, label
6491/// add x, x, 2**k-1
6492/// label:
6493/// sra res, x, k
6494/// neg res, res (when the divisor is negative)
6496 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6497 SmallVectorImpl<SDNode *> &Created) const {
6498 unsigned Lg2 = Divisor.countr_zero();
6499 EVT VT = N->getValueType(0);
6500
6501 SDLoc DL(N);
6502 SDValue N0 = N->getOperand(0);
6503 SDValue Zero = DAG.getConstant(0, DL, VT);
6504 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6505 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6506
6507 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6508 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6509 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6510 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6511 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6512
6513 Created.push_back(Cmp.getNode());
6514 Created.push_back(Add.getNode());
6515 Created.push_back(CMov.getNode());
6516
6517 // Divide by pow2.
6518 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6519 DAG.getShiftAmountConstant(Lg2, VT, DL));
6520
6521 // If we're dividing by a positive value, we're done. Otherwise, we must
6522 // negate the result.
6523 if (Divisor.isNonNegative())
6524 return SRA;
6525
6526 Created.push_back(SRA.getNode());
6527 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6528}
6529
6530/// Given an ISD::SDIV node expressing a divide by constant,
6531/// return a DAG expression to select that will generate the same value by
6532/// multiplying by a magic number.
6533/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6535 bool IsAfterLegalization,
6536 bool IsAfterLegalTypes,
6537 SmallVectorImpl<SDNode *> &Created) const {
6538 SDLoc dl(N);
6539 EVT VT = N->getValueType(0);
6540 EVT SVT = VT.getScalarType();
6541 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6542 EVT ShSVT = ShVT.getScalarType();
6543 unsigned EltBits = VT.getScalarSizeInBits();
6544 EVT MulVT;
6545
6546 // Check to see if we can do this.
6547 // FIXME: We should be more aggressive here.
6548 if (!isTypeLegal(VT)) {
6549 // Limit this to simple scalars for now.
6550 if (VT.isVector() || !VT.isSimple())
6551 return SDValue();
6552
6553 // If this type will be promoted to a large enough type with a legal
6554 // multiply operation, we can go ahead and do this transform.
6556 return SDValue();
6557
6558 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6559 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6560 !isOperationLegal(ISD::MUL, MulVT))
6561 return SDValue();
6562 }
6563
6564 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6565 if (N->getFlags().hasExact())
6566 return BuildExactSDIV(*this, N, dl, DAG, Created);
6567
6568 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6569
6570 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6571 if (C->isZero())
6572 return false;
6573
6574 const APInt &Divisor = C->getAPIntValue();
6576 int NumeratorFactor = 0;
6577 int ShiftMask = -1;
6578
6579 if (Divisor.isOne() || Divisor.isAllOnes()) {
6580 // If d is +1/-1, we just multiply the numerator by +1/-1.
6581 NumeratorFactor = Divisor.getSExtValue();
6582 magics.Magic = 0;
6583 magics.ShiftAmount = 0;
6584 ShiftMask = 0;
6585 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6586 // If d > 0 and m < 0, add the numerator.
6587 NumeratorFactor = 1;
6588 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6589 // If d < 0 and m > 0, subtract the numerator.
6590 NumeratorFactor = -1;
6591 }
6592
6593 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6594 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6595 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6596 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6597 return true;
6598 };
6599
6600 SDValue N0 = N->getOperand(0);
6601 SDValue N1 = N->getOperand(1);
6602
6603 // Collect the shifts / magic values from each element.
6604 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6605 return SDValue();
6606
6607 SDValue MagicFactor, Factor, Shift, ShiftMask;
6608 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6609 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6610 Factor = DAG.getBuildVector(VT, dl, Factors);
6611 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6612 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6613 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6614 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6615 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6616 "Expected matchUnaryPredicate to return one element for scalable "
6617 "vectors");
6618 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6619 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6620 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6621 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6622 } else {
6623 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6624 MagicFactor = MagicFactors[0];
6625 Factor = Factors[0];
6626 Shift = Shifts[0];
6627 ShiftMask = ShiftMasks[0];
6628 }
6629
6630 // Multiply the numerator (operand 0) by the magic value.
6631 // FIXME: We should support doing a MUL in a wider type.
6632 auto GetMULHS = [&](SDValue X, SDValue Y) {
6633 // If the type isn't legal, use a wider mul of the type calculated
6634 // earlier.
6635 if (!isTypeLegal(VT)) {
6636 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6637 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6638 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6639 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6640 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6641 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6642 }
6643
6644 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6645 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6646 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6647 SDValue LoHi =
6648 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6649 return SDValue(LoHi.getNode(), 1);
6650 }
6651 // If type twice as wide legal, widen and use a mul plus a shift.
6652 unsigned Size = VT.getScalarSizeInBits();
6653 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6654 if (VT.isVector())
6655 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6657 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6658 // custom lowered. This is very expensive so avoid it at all costs for
6659 // constant divisors.
6660 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6663 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6664 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6665 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6666 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6667 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6668 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6669 }
6670 return SDValue();
6671 };
6672
6673 SDValue Q = GetMULHS(N0, MagicFactor);
6674 if (!Q)
6675 return SDValue();
6676
6677 Created.push_back(Q.getNode());
6678
6679 // (Optionally) Add/subtract the numerator using Factor.
6680 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6681 Created.push_back(Factor.getNode());
6682 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6683 Created.push_back(Q.getNode());
6684
6685 // Shift right algebraic by shift value.
6686 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6687 Created.push_back(Q.getNode());
6688
6689 // Extract the sign bit, mask it and add it to the quotient.
6690 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6691 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6692 Created.push_back(T.getNode());
6693 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6694 Created.push_back(T.getNode());
6695 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6696}
6697
6698/// Given an ISD::UDIV node expressing a divide by constant,
6699/// return a DAG expression to select that will generate the same value by
6700/// multiplying by a magic number.
6701/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6703 bool IsAfterLegalization,
6704 bool IsAfterLegalTypes,
6705 SmallVectorImpl<SDNode *> &Created) const {
6706 SDLoc dl(N);
6707 EVT VT = N->getValueType(0);
6708 EVT SVT = VT.getScalarType();
6709 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6710 EVT ShSVT = ShVT.getScalarType();
6711 unsigned EltBits = VT.getScalarSizeInBits();
6712 EVT MulVT;
6713
6714 // Check to see if we can do this.
6715 // FIXME: We should be more aggressive here.
6716 if (!isTypeLegal(VT)) {
6717 // Limit this to simple scalars for now.
6718 if (VT.isVector() || !VT.isSimple())
6719 return SDValue();
6720
6721 // If this type will be promoted to a large enough type with a legal
6722 // multiply operation, we can go ahead and do this transform.
6724 return SDValue();
6725
6726 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6727 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6728 !isOperationLegal(ISD::MUL, MulVT))
6729 return SDValue();
6730 }
6731
6732 // If the udiv has an 'exact' bit we can use a simpler lowering.
6733 if (N->getFlags().hasExact())
6734 return BuildExactUDIV(*this, N, dl, DAG, Created);
6735
6736 SDValue N0 = N->getOperand(0);
6737 SDValue N1 = N->getOperand(1);
6738
6739 // Try to use leading zeros of the dividend to reduce the multiplier and
6740 // avoid expensive fixups.
6741 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6742
6743 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6744 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6745
6746 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6747 if (C->isZero())
6748 return false;
6749 const APInt& Divisor = C->getAPIntValue();
6750
6751 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6752
6753 // Magic algorithm doesn't work for division by 1. We need to emit a select
6754 // at the end.
6755 if (Divisor.isOne()) {
6756 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6757 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6758 } else {
6761 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6762
6763 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6764
6765 assert(magics.PreShift < Divisor.getBitWidth() &&
6766 "We shouldn't generate an undefined shift!");
6767 assert(magics.PostShift < Divisor.getBitWidth() &&
6768 "We shouldn't generate an undefined shift!");
6769 assert((!magics.IsAdd || magics.PreShift == 0) &&
6770 "Unexpected pre-shift");
6771 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6772 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6773 NPQFactor = DAG.getConstant(
6774 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6775 : APInt::getZero(EltBits),
6776 dl, SVT);
6777 UseNPQ |= magics.IsAdd;
6778 UsePreShift |= magics.PreShift != 0;
6779 UsePostShift |= magics.PostShift != 0;
6780 }
6781
6782 PreShifts.push_back(PreShift);
6783 MagicFactors.push_back(MagicFactor);
6784 NPQFactors.push_back(NPQFactor);
6785 PostShifts.push_back(PostShift);
6786 return true;
6787 };
6788
6789 // Collect the shifts/magic values from each element.
6790 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6791 return SDValue();
6792
6793 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6794 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6795 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6796 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6797 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6798 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6799 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6800 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6801 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6802 "Expected matchUnaryPredicate to return one for scalable vectors");
6803 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6804 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6805 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6806 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6807 } else {
6808 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6809 PreShift = PreShifts[0];
6810 MagicFactor = MagicFactors[0];
6811 PostShift = PostShifts[0];
6812 }
6813
6814 SDValue Q = N0;
6815 if (UsePreShift) {
6816 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6817 Created.push_back(Q.getNode());
6818 }
6819
6820 // FIXME: We should support doing a MUL in a wider type.
6821 auto GetMULHU = [&](SDValue X, SDValue Y) {
6822 // If the type isn't legal, use a wider mul of the type calculated
6823 // earlier.
6824 if (!isTypeLegal(VT)) {
6825 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6826 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6827 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6828 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6829 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6830 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6831 }
6832
6833 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6834 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6835 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6836 SDValue LoHi =
6837 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6838 return SDValue(LoHi.getNode(), 1);
6839 }
6840 // If type twice as wide legal, widen and use a mul plus a shift.
6841 unsigned Size = VT.getScalarSizeInBits();
6842 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6843 if (VT.isVector())
6844 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6846 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6847 // custom lowered. This is very expensive so avoid it at all costs for
6848 // constant divisors.
6849 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6852 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6853 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6854 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6855 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6856 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6857 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6858 }
6859 return SDValue(); // No mulhu or equivalent
6860 };
6861
6862 // Multiply the numerator (operand 0) by the magic value.
6863 Q = GetMULHU(Q, MagicFactor);
6864 if (!Q)
6865 return SDValue();
6866
6867 Created.push_back(Q.getNode());
6868
6869 if (UseNPQ) {
6870 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6871 Created.push_back(NPQ.getNode());
6872
6873 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6874 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6875 if (VT.isVector())
6876 NPQ = GetMULHU(NPQ, NPQFactor);
6877 else
6878 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6879
6880 Created.push_back(NPQ.getNode());
6881
6882 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6883 Created.push_back(Q.getNode());
6884 }
6885
6886 if (UsePostShift) {
6887 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6888 Created.push_back(Q.getNode());
6889 }
6890
6891 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6892
6893 SDValue One = DAG.getConstant(1, dl, VT);
6894 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6895 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6896}
6897
6898/// If all values in Values that *don't* match the predicate are same 'splat'
6899/// value, then replace all values with that splat value.
6900/// Else, if AlternativeReplacement was provided, then replace all values that
6901/// do match predicate with AlternativeReplacement value.
6902static void
6904 std::function<bool(SDValue)> Predicate,
6905 SDValue AlternativeReplacement = SDValue()) {
6906 SDValue Replacement;
6907 // Is there a value for which the Predicate does *NOT* match? What is it?
6908 auto SplatValue = llvm::find_if_not(Values, Predicate);
6909 if (SplatValue != Values.end()) {
6910 // Does Values consist only of SplatValue's and values matching Predicate?
6911 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6912 return Value == *SplatValue || Predicate(Value);
6913 })) // Then we shall replace values matching predicate with SplatValue.
6914 Replacement = *SplatValue;
6915 }
6916 if (!Replacement) {
6917 // Oops, we did not find the "baseline" splat value.
6918 if (!AlternativeReplacement)
6919 return; // Nothing to do.
6920 // Let's replace with provided value then.
6921 Replacement = AlternativeReplacement;
6922 }
6923 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6924}
6925
6926/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6927/// where the divisor is constant and the comparison target is zero,
6928/// return a DAG expression that will generate the same comparison result
6929/// using only multiplications, additions and shifts/rotations.
6930/// Ref: "Hacker's Delight" 10-17.
6931SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6932 SDValue CompTargetNode,
6934 DAGCombinerInfo &DCI,
6935 const SDLoc &DL) const {
6937 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6938 DCI, DL, Built)) {
6939 for (SDNode *N : Built)
6940 DCI.AddToWorklist(N);
6941 return Folded;
6942 }
6943
6944 return SDValue();
6945}
6946
6947SDValue
6948TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6949 SDValue CompTargetNode, ISD::CondCode Cond,
6950 DAGCombinerInfo &DCI, const SDLoc &DL,
6951 SmallVectorImpl<SDNode *> &Created) const {
6952 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6953 // - D must be constant, with D = D0 * 2^K where D0 is odd
6954 // - P is the multiplicative inverse of D0 modulo 2^W
6955 // - Q = floor(((2^W) - 1) / D)
6956 // where W is the width of the common type of N and D.
6957 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6958 "Only applicable for (in)equality comparisons.");
6959
6960 SelectionDAG &DAG = DCI.DAG;
6961
6962 EVT VT = REMNode.getValueType();
6963 EVT SVT = VT.getScalarType();
6964 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6965 EVT ShSVT = ShVT.getScalarType();
6966
6967 // If MUL is unavailable, we cannot proceed in any case.
6968 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6969 return SDValue();
6970
6971 bool ComparingWithAllZeros = true;
6972 bool AllComparisonsWithNonZerosAreTautological = true;
6973 bool HadTautologicalLanes = false;
6974 bool AllLanesAreTautological = true;
6975 bool HadEvenDivisor = false;
6976 bool AllDivisorsArePowerOfTwo = true;
6977 bool HadTautologicalInvertedLanes = false;
6978 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
6979
6980 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6981 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6982 if (CDiv->isZero())
6983 return false;
6984
6985 const APInt &D = CDiv->getAPIntValue();
6986 const APInt &Cmp = CCmp->getAPIntValue();
6987
6988 ComparingWithAllZeros &= Cmp.isZero();
6989
6990 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6991 // if C2 is not less than C1, the comparison is always false.
6992 // But we will only be able to produce the comparison that will give the
6993 // opposive tautological answer. So this lane would need to be fixed up.
6994 bool TautologicalInvertedLane = D.ule(Cmp);
6995 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6996
6997 // If all lanes are tautological (either all divisors are ones, or divisor
6998 // is not greater than the constant we are comparing with),
6999 // we will prefer to avoid the fold.
7000 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7001 HadTautologicalLanes |= TautologicalLane;
7002 AllLanesAreTautological &= TautologicalLane;
7003
7004 // If we are comparing with non-zero, we need'll need to subtract said
7005 // comparison value from the LHS. But there is no point in doing that if
7006 // every lane where we are comparing with non-zero is tautological..
7007 if (!Cmp.isZero())
7008 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7009
7010 // Decompose D into D0 * 2^K
7011 unsigned K = D.countr_zero();
7012 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7013 APInt D0 = D.lshr(K);
7014
7015 // D is even if it has trailing zeros.
7016 HadEvenDivisor |= (K != 0);
7017 // D is a power-of-two if D0 is one.
7018 // If all divisors are power-of-two, we will prefer to avoid the fold.
7019 AllDivisorsArePowerOfTwo &= D0.isOne();
7020
7021 // P = inv(D0, 2^W)
7022 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7023 unsigned W = D.getBitWidth();
7025 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7026
7027 // Q = floor((2^W - 1) u/ D)
7028 // R = ((2^W - 1) u% D)
7029 APInt Q, R;
7031
7032 // If we are comparing with zero, then that comparison constant is okay,
7033 // else it may need to be one less than that.
7034 if (Cmp.ugt(R))
7035 Q -= 1;
7036
7038 "We are expecting that K is always less than all-ones for ShSVT");
7039
7040 // If the lane is tautological the result can be constant-folded.
7041 if (TautologicalLane) {
7042 // Set P and K amount to a bogus values so we can try to splat them.
7043 P = 0;
7044 K = -1;
7045 // And ensure that comparison constant is tautological,
7046 // it will always compare true/false.
7047 Q = -1;
7048 }
7049
7050 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7051 KAmts.push_back(
7052 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7053 /*implicitTrunc=*/true),
7054 DL, ShSVT));
7055 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7056 return true;
7057 };
7058
7059 SDValue N = REMNode.getOperand(0);
7060 SDValue D = REMNode.getOperand(1);
7061
7062 // Collect the values from each element.
7063 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7064 return SDValue();
7065
7066 // If all lanes are tautological, the result can be constant-folded.
7067 if (AllLanesAreTautological)
7068 return SDValue();
7069
7070 // If this is a urem by a powers-of-two, avoid the fold since it can be
7071 // best implemented as a bit test.
7072 if (AllDivisorsArePowerOfTwo)
7073 return SDValue();
7074
7075 SDValue PVal, KVal, QVal;
7076 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7077 if (HadTautologicalLanes) {
7078 // Try to turn PAmts into a splat, since we don't care about the values
7079 // that are currently '0'. If we can't, just keep '0'`s.
7081 // Try to turn KAmts into a splat, since we don't care about the values
7082 // that are currently '-1'. If we can't, change them to '0'`s.
7084 DAG.getConstant(0, DL, ShSVT));
7085 }
7086
7087 PVal = DAG.getBuildVector(VT, DL, PAmts);
7088 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7089 QVal = DAG.getBuildVector(VT, DL, QAmts);
7090 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7091 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7092 "Expected matchBinaryPredicate to return one element for "
7093 "SPLAT_VECTORs");
7094 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7095 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7096 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7097 } else {
7098 PVal = PAmts[0];
7099 KVal = KAmts[0];
7100 QVal = QAmts[0];
7101 }
7102
7103 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7104 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7105 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7106 assert(CompTargetNode.getValueType() == N.getValueType() &&
7107 "Expecting that the types on LHS and RHS of comparisons match.");
7108 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7109 }
7110
7111 // (mul N, P)
7112 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7113 Created.push_back(Op0.getNode());
7114
7115 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7116 // divisors as a performance improvement, since rotating by 0 is a no-op.
7117 if (HadEvenDivisor) {
7118 // We need ROTR to do this.
7119 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7120 return SDValue();
7121 // UREM: (rotr (mul N, P), K)
7122 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7123 Created.push_back(Op0.getNode());
7124 }
7125
7126 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7127 SDValue NewCC =
7128 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7130 if (!HadTautologicalInvertedLanes)
7131 return NewCC;
7132
7133 // If any lanes previously compared always-false, the NewCC will give
7134 // always-true result for them, so we need to fixup those lanes.
7135 // Or the other way around for inequality predicate.
7136 assert(VT.isVector() && "Can/should only get here for vectors.");
7137 Created.push_back(NewCC.getNode());
7138
7139 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7140 // if C2 is not less than C1, the comparison is always false.
7141 // But we have produced the comparison that will give the
7142 // opposive tautological answer. So these lanes would need to be fixed up.
7143 SDValue TautologicalInvertedChannels =
7144 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7145 Created.push_back(TautologicalInvertedChannels.getNode());
7146
7147 // NOTE: we avoid letting illegal types through even if we're before legalize
7148 // ops – legalization has a hard time producing good code for this.
7149 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7150 // If we have a vector select, let's replace the comparison results in the
7151 // affected lanes with the correct tautological result.
7152 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7153 DL, SETCCVT, SETCCVT);
7154 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7155 Replacement, NewCC);
7156 }
7157
7158 // Else, we can just invert the comparison result in the appropriate lanes.
7159 //
7160 // NOTE: see the note above VSELECT above.
7161 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7162 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7163 TautologicalInvertedChannels);
7164
7165 return SDValue(); // Don't know how to lower.
7166}
7167
7168/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7169/// where the divisor is constant and the comparison target is zero,
7170/// return a DAG expression that will generate the same comparison result
7171/// using only multiplications, additions and shifts/rotations.
7172/// Ref: "Hacker's Delight" 10-17.
7173SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7174 SDValue CompTargetNode,
7176 DAGCombinerInfo &DCI,
7177 const SDLoc &DL) const {
7179 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7180 DCI, DL, Built)) {
7181 assert(Built.size() <= 7 && "Max size prediction failed.");
7182 for (SDNode *N : Built)
7183 DCI.AddToWorklist(N);
7184 return Folded;
7185 }
7186
7187 return SDValue();
7188}
7189
7190SDValue
7191TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7192 SDValue CompTargetNode, ISD::CondCode Cond,
7193 DAGCombinerInfo &DCI, const SDLoc &DL,
7194 SmallVectorImpl<SDNode *> &Created) const {
7195 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7196 // Fold:
7197 // (seteq/ne (srem N, D), 0)
7198 // To:
7199 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7200 //
7201 // - D must be constant, with D = D0 * 2^K where D0 is odd
7202 // - P is the multiplicative inverse of D0 modulo 2^W
7203 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7204 // - Q = floor((2 * A) / (2^K))
7205 // where W is the width of the common type of N and D.
7206 //
7207 // When D is a power of two (and thus D0 is 1), the normal
7208 // formula for A and Q don't apply, because the derivation
7209 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7210 // does not apply. This specifically fails when N = INT_MIN.
7211 //
7212 // Instead, for power-of-two D, we use:
7213 // - A = 2^(W-1)
7214 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7215 // - Q = 2^(W-K) - 1
7216 // |-> Test that the top K bits are zero after rotation
7217 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7218 "Only applicable for (in)equality comparisons.");
7219
7220 SelectionDAG &DAG = DCI.DAG;
7221
7222 EVT VT = REMNode.getValueType();
7223 EVT SVT = VT.getScalarType();
7224 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7225 EVT ShSVT = ShVT.getScalarType();
7226
7227 // If we are after ops legalization, and MUL is unavailable, we can not
7228 // proceed.
7229 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7230 return SDValue();
7231
7232 // TODO: Could support comparing with non-zero too.
7233 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7234 if (!CompTarget || !CompTarget->isZero())
7235 return SDValue();
7236
7237 bool HadIntMinDivisor = false;
7238 bool HadOneDivisor = false;
7239 bool AllDivisorsAreOnes = true;
7240 bool HadEvenDivisor = false;
7241 bool NeedToApplyOffset = false;
7242 bool AllDivisorsArePowerOfTwo = true;
7243 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7244
7245 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7246 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7247 if (C->isZero())
7248 return false;
7249
7250 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7251
7252 // WARNING: this fold is only valid for positive divisors!
7253 APInt D = C->getAPIntValue();
7254 if (D.isNegative())
7255 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7256
7257 HadIntMinDivisor |= D.isMinSignedValue();
7258
7259 // If all divisors are ones, we will prefer to avoid the fold.
7260 HadOneDivisor |= D.isOne();
7261 AllDivisorsAreOnes &= D.isOne();
7262
7263 // Decompose D into D0 * 2^K
7264 unsigned K = D.countr_zero();
7265 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7266 APInt D0 = D.lshr(K);
7267
7268 if (!D.isMinSignedValue()) {
7269 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7270 // we don't care about this lane in this fold, we'll special-handle it.
7271 HadEvenDivisor |= (K != 0);
7272 }
7273
7274 // D is a power-of-two if D0 is one. This includes INT_MIN.
7275 // If all divisors are power-of-two, we will prefer to avoid the fold.
7276 AllDivisorsArePowerOfTwo &= D0.isOne();
7277
7278 // P = inv(D0, 2^W)
7279 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7280 unsigned W = D.getBitWidth();
7282 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7283
7284 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7286 A.clearLowBits(K);
7287
7288 if (!D.isMinSignedValue()) {
7289 // If divisor INT_MIN, then we don't care about this lane in this fold,
7290 // we'll special-handle it.
7291 NeedToApplyOffset |= A != 0;
7292 }
7293
7294 // Q = floor((2 * A) / (2^K))
7295 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7296
7298 "We are expecting that A is always less than all-ones for SVT");
7300 "We are expecting that K is always less than all-ones for ShSVT");
7301
7302 // If D was a power of two, apply the alternate constant derivation.
7303 if (D0.isOne()) {
7304 // A = 2^(W-1)
7306 // - Q = 2^(W-K) - 1
7307 Q = APInt::getAllOnes(W - K).zext(W);
7308 }
7309
7310 // If the divisor is 1 the result can be constant-folded. Likewise, we
7311 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7312 if (D.isOne()) {
7313 // Set P, A and K to a bogus values so we can try to splat them.
7314 P = 0;
7315 A = -1;
7316 K = -1;
7317
7318 // x ?% 1 == 0 <--> true <--> x u<= -1
7319 Q = -1;
7320 }
7321
7322 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7323 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7324 KAmts.push_back(
7325 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7326 /*implicitTrunc=*/true),
7327 DL, ShSVT));
7328 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7329 return true;
7330 };
7331
7332 SDValue N = REMNode.getOperand(0);
7333 SDValue D = REMNode.getOperand(1);
7334
7335 // Collect the values from each element.
7336 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7337 return SDValue();
7338
7339 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7340 if (AllDivisorsAreOnes)
7341 return SDValue();
7342
7343 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7344 // since it can be best implemented as a bit test.
7345 if (AllDivisorsArePowerOfTwo)
7346 return SDValue();
7347
7348 SDValue PVal, AVal, KVal, QVal;
7349 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7350 if (HadOneDivisor) {
7351 // Try to turn PAmts into a splat, since we don't care about the values
7352 // that are currently '0'. If we can't, just keep '0'`s.
7354 // Try to turn AAmts into a splat, since we don't care about the
7355 // values that are currently '-1'. If we can't, change them to '0'`s.
7357 DAG.getConstant(0, DL, SVT));
7358 // Try to turn KAmts into a splat, since we don't care about the values
7359 // that are currently '-1'. If we can't, change them to '0'`s.
7361 DAG.getConstant(0, DL, ShSVT));
7362 }
7363
7364 PVal = DAG.getBuildVector(VT, DL, PAmts);
7365 AVal = DAG.getBuildVector(VT, DL, AAmts);
7366 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7367 QVal = DAG.getBuildVector(VT, DL, QAmts);
7368 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7369 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7370 QAmts.size() == 1 &&
7371 "Expected matchUnaryPredicate to return one element for scalable "
7372 "vectors");
7373 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7374 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7375 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7376 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7377 } else {
7378 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7379 PVal = PAmts[0];
7380 AVal = AAmts[0];
7381 KVal = KAmts[0];
7382 QVal = QAmts[0];
7383 }
7384
7385 // (mul N, P)
7386 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7387 Created.push_back(Op0.getNode());
7388
7389 if (NeedToApplyOffset) {
7390 // We need ADD to do this.
7391 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7392 return SDValue();
7393
7394 // (add (mul N, P), A)
7395 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7396 Created.push_back(Op0.getNode());
7397 }
7398
7399 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7400 // divisors as a performance improvement, since rotating by 0 is a no-op.
7401 if (HadEvenDivisor) {
7402 // We need ROTR to do this.
7403 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7404 return SDValue();
7405 // SREM: (rotr (add (mul N, P), A), K)
7406 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7407 Created.push_back(Op0.getNode());
7408 }
7409
7410 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7411 SDValue Fold =
7412 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7414
7415 // If we didn't have lanes with INT_MIN divisor, then we're done.
7416 if (!HadIntMinDivisor)
7417 return Fold;
7418
7419 // That fold is only valid for positive divisors. Which effectively means,
7420 // it is invalid for INT_MIN divisors. So if we have such a lane,
7421 // we must fix-up results for said lanes.
7422 assert(VT.isVector() && "Can/should only get here for vectors.");
7423
7424 // NOTE: we avoid letting illegal types through even if we're before legalize
7425 // ops – legalization has a hard time producing good code for the code that
7426 // follows.
7427 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7431 return SDValue();
7432
7433 Created.push_back(Fold.getNode());
7434
7435 SDValue IntMin = DAG.getConstant(
7437 SDValue IntMax = DAG.getConstant(
7439 SDValue Zero =
7441
7442 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7443 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7444 Created.push_back(DivisorIsIntMin.getNode());
7445
7446 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7447 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7448 Created.push_back(Masked.getNode());
7449 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7450 Created.push_back(MaskedIsZero.getNode());
7451
7452 // To produce final result we need to blend 2 vectors: 'SetCC' and
7453 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7454 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7455 // constant-folded, select can get lowered to a shuffle with constant mask.
7456 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7457 MaskedIsZero, Fold);
7458
7459 return Blended;
7460}
7461
7463 const DenormalMode &Mode) const {
7464 SDLoc DL(Op);
7465 EVT VT = Op.getValueType();
7466 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7467 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7468
7469 // This is specifically a check for the handling of denormal inputs, not the
7470 // result.
7471 if (Mode.Input == DenormalMode::PreserveSign ||
7472 Mode.Input == DenormalMode::PositiveZero) {
7473 // Test = X == 0.0
7474 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7475 }
7476
7477 // Testing it with denormal inputs to avoid wrong estimate.
7478 //
7479 // Test = fabs(X) < SmallestNormal
7480 const fltSemantics &FltSem = VT.getFltSemantics();
7481 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7482 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7483 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7484 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7485}
7486
7488 bool LegalOps, bool OptForSize,
7490 unsigned Depth) const {
7491 // fneg is removable even if it has multiple uses.
7492 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7494 return Op.getOperand(0);
7495 }
7496
7497 // Don't recurse exponentially.
7499 return SDValue();
7500
7501 // Pre-increment recursion depth for use in recursive calls.
7502 ++Depth;
7503 const SDNodeFlags Flags = Op->getFlags();
7504 const TargetOptions &Options = DAG.getTarget().Options;
7505 EVT VT = Op.getValueType();
7506 unsigned Opcode = Op.getOpcode();
7507
7508 // Don't allow anything with multiple uses unless we know it is free.
7509 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7510 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7511 isFPExtFree(VT, Op.getOperand(0).getValueType());
7512 if (!IsFreeExtend)
7513 return SDValue();
7514 }
7515
7516 auto RemoveDeadNode = [&](SDValue N) {
7517 if (N && N.getNode()->use_empty())
7518 DAG.RemoveDeadNode(N.getNode());
7519 };
7520
7521 SDLoc DL(Op);
7522
7523 // Because getNegatedExpression can delete nodes we need a handle to keep
7524 // temporary nodes alive in case the recursion manages to create an identical
7525 // node.
7526 std::list<HandleSDNode> Handles;
7527
7528 switch (Opcode) {
7529 case ISD::ConstantFP: {
7530 // Don't invert constant FP values after legalization unless the target says
7531 // the negated constant is legal.
7532 bool IsOpLegal =
7534 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7535 OptForSize);
7536
7537 if (LegalOps && !IsOpLegal)
7538 break;
7539
7540 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7541 V.changeSign();
7542 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7543
7544 // If we already have the use of the negated floating constant, it is free
7545 // to negate it even it has multiple uses.
7546 if (!Op.hasOneUse() && CFP.use_empty())
7547 break;
7549 return CFP;
7550 }
7551 case ISD::BUILD_VECTOR: {
7552 // Only permit BUILD_VECTOR of constants.
7553 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7554 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7555 }))
7556 break;
7557
7558 bool IsOpLegal =
7561 llvm::all_of(Op->op_values(), [&](SDValue N) {
7562 return N.isUndef() ||
7563 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7564 OptForSize);
7565 });
7566
7567 if (LegalOps && !IsOpLegal)
7568 break;
7569
7571 for (SDValue C : Op->op_values()) {
7572 if (C.isUndef()) {
7573 Ops.push_back(C);
7574 continue;
7575 }
7576 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7577 V.changeSign();
7578 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7579 }
7581 return DAG.getBuildVector(VT, DL, Ops);
7582 }
7583 case ISD::FADD: {
7584 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7585 break;
7586
7587 // After operation legalization, it might not be legal to create new FSUBs.
7588 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7589 break;
7590 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7591
7592 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7594 SDValue NegX =
7595 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7596 // Prevent this node from being deleted by the next call.
7597 if (NegX)
7598 Handles.emplace_back(NegX);
7599
7600 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7602 SDValue NegY =
7603 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7604
7605 // We're done with the handles.
7606 Handles.clear();
7607
7608 // Negate the X if its cost is less or equal than Y.
7609 if (NegX && (CostX <= CostY)) {
7610 Cost = CostX;
7611 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7612 if (NegY != N)
7613 RemoveDeadNode(NegY);
7614 return N;
7615 }
7616
7617 // Negate the Y if it is not expensive.
7618 if (NegY) {
7619 Cost = CostY;
7620 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7621 if (NegX != N)
7622 RemoveDeadNode(NegX);
7623 return N;
7624 }
7625 break;
7626 }
7627 case ISD::FSUB: {
7628 // We can't turn -(A-B) into B-A when we honor signed zeros.
7629 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7630 break;
7631
7632 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7633 // fold (fneg (fsub 0, Y)) -> Y
7634 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7635 if (C->isZero()) {
7637 return Y;
7638 }
7639
7640 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7642 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7643 }
7644 case ISD::FMUL:
7645 case ISD::FDIV: {
7646 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7647
7648 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7650 SDValue NegX =
7651 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7652 // Prevent this node from being deleted by the next call.
7653 if (NegX)
7654 Handles.emplace_back(NegX);
7655
7656 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7658 SDValue NegY =
7659 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7660
7661 // We're done with the handles.
7662 Handles.clear();
7663
7664 // Negate the X if its cost is less or equal than Y.
7665 if (NegX && (CostX <= CostY)) {
7666 Cost = CostX;
7667 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7668 if (NegY != N)
7669 RemoveDeadNode(NegY);
7670 return N;
7671 }
7672
7673 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7674 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7675 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7676 break;
7677
7678 // Negate the Y if it is not expensive.
7679 if (NegY) {
7680 Cost = CostY;
7681 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7682 if (NegX != N)
7683 RemoveDeadNode(NegX);
7684 return N;
7685 }
7686 break;
7687 }
7688 case ISD::FMA:
7689 case ISD::FMAD: {
7690 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7691 break;
7692
7693 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7695 SDValue NegZ =
7696 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7697 // Give up if fail to negate the Z.
7698 if (!NegZ)
7699 break;
7700
7701 // Prevent this node from being deleted by the next two calls.
7702 Handles.emplace_back(NegZ);
7703
7704 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7706 SDValue NegX =
7707 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7708 // Prevent this node from being deleted by the next call.
7709 if (NegX)
7710 Handles.emplace_back(NegX);
7711
7712 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7714 SDValue NegY =
7715 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7716
7717 // We're done with the handles.
7718 Handles.clear();
7719
7720 // Negate the X if its cost is less or equal than Y.
7721 if (NegX && (CostX <= CostY)) {
7722 Cost = std::min(CostX, CostZ);
7723 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7724 if (NegY != N)
7725 RemoveDeadNode(NegY);
7726 return N;
7727 }
7728
7729 // Negate the Y if it is not expensive.
7730 if (NegY) {
7731 Cost = std::min(CostY, CostZ);
7732 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7733 if (NegX != N)
7734 RemoveDeadNode(NegX);
7735 return N;
7736 }
7737 break;
7738 }
7739
7740 case ISD::FP_EXTEND:
7741 case ISD::FSIN:
7742 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7743 OptForSize, Cost, Depth))
7744 return DAG.getNode(Opcode, DL, VT, NegV);
7745 break;
7746 case ISD::FP_ROUND:
7747 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7748 OptForSize, Cost, Depth))
7749 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7750 break;
7751 case ISD::SELECT:
7752 case ISD::VSELECT: {
7753 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7754 // iff at least one cost is cheaper and the other is neutral/cheaper
7755 SDValue LHS = Op.getOperand(1);
7757 SDValue NegLHS =
7758 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7759 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7760 RemoveDeadNode(NegLHS);
7761 break;
7762 }
7763
7764 // Prevent this node from being deleted by the next call.
7765 Handles.emplace_back(NegLHS);
7766
7767 SDValue RHS = Op.getOperand(2);
7769 SDValue NegRHS =
7770 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7771
7772 // We're done with the handles.
7773 Handles.clear();
7774
7775 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7776 (CostLHS != NegatibleCost::Cheaper &&
7777 CostRHS != NegatibleCost::Cheaper)) {
7778 RemoveDeadNode(NegLHS);
7779 RemoveDeadNode(NegRHS);
7780 break;
7781 }
7782
7783 Cost = std::min(CostLHS, CostRHS);
7784 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7785 }
7786 }
7787
7788 return SDValue();
7789}
7790
7791//===----------------------------------------------------------------------===//
7792// Legalization Utilities
7793//===----------------------------------------------------------------------===//
7794
7795bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7796 SDValue LHS, SDValue RHS,
7798 EVT HiLoVT, SelectionDAG &DAG,
7799 MulExpansionKind Kind, SDValue LL,
7800 SDValue LH, SDValue RL, SDValue RH) const {
7801 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7802 Opcode == ISD::SMUL_LOHI);
7803
7804 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7806 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7808 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7810 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7812
7813 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7814 return false;
7815
7816 unsigned OuterBitSize = VT.getScalarSizeInBits();
7817 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7818
7819 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7820 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7821 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7822
7823 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7824 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7825 bool Signed) -> bool {
7826 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7827 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7828 Hi = SDValue(Lo.getNode(), 1);
7829 return true;
7830 }
7831 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7832 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7833 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7834 return true;
7835 }
7836 return false;
7837 };
7838
7839 SDValue Lo, Hi;
7840
7841 if (!LL.getNode() && !RL.getNode() &&
7843 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7844 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7845 }
7846
7847 if (!LL.getNode())
7848 return false;
7849
7850 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7851 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7852 DAG.MaskedValueIsZero(RHS, HighMask)) {
7853 // The inputs are both zero-extended.
7854 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7855 Result.push_back(Lo);
7856 Result.push_back(Hi);
7857 if (Opcode != ISD::MUL) {
7858 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7859 Result.push_back(Zero);
7860 Result.push_back(Zero);
7861 }
7862 return true;
7863 }
7864 }
7865
7866 if (!VT.isVector() && Opcode == ISD::MUL &&
7867 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7868 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7869 // The input values are both sign-extended.
7870 // TODO non-MUL case?
7871 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7872 Result.push_back(Lo);
7873 Result.push_back(Hi);
7874 return true;
7875 }
7876 }
7877
7878 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7879 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7880
7881 if (!LH.getNode() && !RH.getNode() &&
7884 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7885 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7886 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7887 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7888 }
7889
7890 if (!LH.getNode())
7891 return false;
7892
7893 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7894 return false;
7895
7896 Result.push_back(Lo);
7897
7898 if (Opcode == ISD::MUL) {
7899 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7900 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7901 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7902 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7903 Result.push_back(Hi);
7904 return true;
7905 }
7906
7907 // Compute the full width result.
7908 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7909 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7910 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7911 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7912 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7913 };
7914
7915 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7916 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7917 return false;
7918
7919 // This is effectively the add part of a multiply-add of half-sized operands,
7920 // so it cannot overflow.
7921 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7922
7923 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7924 return false;
7925
7926 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7927 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7928
7929 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7931 if (UseGlue)
7932 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7933 Merge(Lo, Hi));
7934 else
7935 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7936 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7937
7938 SDValue Carry = Next.getValue(1);
7939 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7940 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7941
7942 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7943 return false;
7944
7945 if (UseGlue)
7946 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7947 Carry);
7948 else
7949 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7950 Zero, Carry);
7951
7952 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7953
7954 if (Opcode == ISD::SMUL_LOHI) {
7955 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7956 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7957 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7958
7959 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7960 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7961 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7962 }
7963
7964 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7965 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7966 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7967 return true;
7968}
7969
7971 SelectionDAG &DAG, MulExpansionKind Kind,
7972 SDValue LL, SDValue LH, SDValue RL,
7973 SDValue RH) const {
7975 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7976 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7977 DAG, Kind, LL, LH, RL, RH);
7978 if (Ok) {
7979 assert(Result.size() == 2);
7980 Lo = Result[0];
7981 Hi = Result[1];
7982 }
7983 return Ok;
7984}
7985
7986// Optimize unsigned division or remainder by constants for types twice as large
7987// as a legal VT.
7988//
7989// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7990// can be computed
7991// as:
7992// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7993// Remainder = Sum % Constant
7994// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7995//
7996// For division, we can compute the remainder using the algorithm described
7997// above, subtract it from the dividend to get an exact multiple of Constant.
7998// Then multiply that exact multiply by the multiplicative inverse modulo
7999// (1 << (BitWidth / 2)) to get the quotient.
8000
8001// If Constant is even, we can shift right the dividend and the divisor by the
8002// number of trailing zeros in Constant before applying the remainder algorithm.
8003// If we're after the quotient, we can subtract this value from the shifted
8004// dividend and multiply by the multiplicative inverse of the shifted divisor.
8005// If we want the remainder, we shift the value left by the number of trailing
8006// zeros and add the bits that were shifted out of the dividend.
8009 EVT HiLoVT, SelectionDAG &DAG,
8010 SDValue LL, SDValue LH) const {
8011 unsigned Opcode = N->getOpcode();
8012 EVT VT = N->getValueType(0);
8013
8014 // TODO: Support signed division/remainder.
8015 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8016 return false;
8017 assert(
8018 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8019 "Unexpected opcode");
8020
8021 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8022 if (!CN)
8023 return false;
8024
8025 APInt Divisor = CN->getAPIntValue();
8026 unsigned BitWidth = Divisor.getBitWidth();
8027 unsigned HBitWidth = BitWidth / 2;
8029 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8030
8031 // Divisor needs to less than (1 << HBitWidth).
8032 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8033 if (Divisor.uge(HalfMaxPlus1))
8034 return false;
8035
8036 // We depend on the UREM by constant optimization in DAGCombiner that requires
8037 // high multiply.
8038 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8040 return false;
8041
8042 // Don't expand if optimizing for size.
8043 if (DAG.shouldOptForSize())
8044 return false;
8045
8046 // Early out for 0 or 1 divisors.
8047 if (Divisor.ule(1))
8048 return false;
8049
8050 // If the divisor is even, shift it until it becomes odd.
8051 unsigned TrailingZeros = 0;
8052 if (!Divisor[0]) {
8053 TrailingZeros = Divisor.countr_zero();
8054 Divisor.lshrInPlace(TrailingZeros);
8055 }
8056
8057 SDLoc dl(N);
8058 SDValue Sum;
8059 SDValue PartialRem;
8060
8061 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
8062 // then add in the carry.
8063 // TODO: If we can't split it in half, we might be able to split into 3 or
8064 // more pieces using a smaller bit width.
8065 if (HalfMaxPlus1.urem(Divisor).isOne()) {
8066 assert(!LL == !LH && "Expected both input halves or no input halves!");
8067 if (!LL)
8068 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8069
8070 // Shift the input by the number of TrailingZeros in the divisor. The
8071 // shifted out bits will be added to the remainder later.
8072 if (TrailingZeros) {
8073 // Save the shifted off bits if we need the remainder.
8074 if (Opcode != ISD::UDIV) {
8075 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8076 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8077 DAG.getConstant(Mask, dl, HiLoVT));
8078 }
8079
8080 LL = DAG.getNode(
8081 ISD::OR, dl, HiLoVT,
8082 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
8083 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
8084 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
8085 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
8086 HiLoVT, dl)));
8087 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
8088 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8089 }
8090
8091 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8092 EVT SetCCType =
8093 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8095 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8096 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8097 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8098 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8099 } else {
8100 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8101 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8102 // If the boolean for the target is 0 or 1, we can add the setcc result
8103 // directly.
8104 if (getBooleanContents(HiLoVT) ==
8106 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8107 else
8108 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8109 DAG.getConstant(0, dl, HiLoVT));
8110 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8111 }
8112 }
8113
8114 // If we didn't find a sum, we can't do the expansion.
8115 if (!Sum)
8116 return false;
8117
8118 // Perform a HiLoVT urem on the Sum using truncated divisor.
8119 SDValue RemL =
8120 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8121 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8122 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8123
8124 if (Opcode != ISD::UREM) {
8125 // Subtract the remainder from the shifted dividend.
8126 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8127 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8128
8129 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8130
8131 // Multiply by the multiplicative inverse of the divisor modulo
8132 // (1 << BitWidth).
8133 APInt MulFactor = Divisor.multiplicativeInverse();
8134
8135 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8136 DAG.getConstant(MulFactor, dl, VT));
8137
8138 // Split the quotient into low and high parts.
8139 SDValue QuotL, QuotH;
8140 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8141 Result.push_back(QuotL);
8142 Result.push_back(QuotH);
8143 }
8144
8145 if (Opcode != ISD::UDIV) {
8146 // If we shifted the input, shift the remainder left and add the bits we
8147 // shifted off the input.
8148 if (TrailingZeros) {
8149 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8150 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8151 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
8152 }
8153 Result.push_back(RemL);
8154 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
8155 }
8156
8157 return true;
8158}
8159
8160// Check that (every element of) Z is undef or not an exact multiple of BW.
8161static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8163 Z,
8164 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8165 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8166}
8167
8169 EVT VT = Node->getValueType(0);
8170 SDValue ShX, ShY;
8171 SDValue ShAmt, InvShAmt;
8172 SDValue X = Node->getOperand(0);
8173 SDValue Y = Node->getOperand(1);
8174 SDValue Z = Node->getOperand(2);
8175 SDValue Mask = Node->getOperand(3);
8176 SDValue VL = Node->getOperand(4);
8177
8178 unsigned BW = VT.getScalarSizeInBits();
8179 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8180 SDLoc DL(SDValue(Node, 0));
8181
8182 EVT ShVT = Z.getValueType();
8183 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8184 // fshl: X << C | Y >> (BW - C)
8185 // fshr: X << (BW - C) | Y >> C
8186 // where C = Z % BW is not zero
8187 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8188 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8189 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8190 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8191 VL);
8192 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8193 VL);
8194 } else {
8195 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8196 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8197 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8198 if (isPowerOf2_32(BW)) {
8199 // Z % BW -> Z & (BW - 1)
8200 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8201 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8202 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8203 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8204 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8205 } else {
8206 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8207 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8208 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8209 }
8210
8211 SDValue One = DAG.getConstant(1, DL, ShVT);
8212 if (IsFSHL) {
8213 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8214 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8215 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8216 } else {
8217 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8218 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8219 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8220 }
8221 }
8222 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8223}
8224
8226 SelectionDAG &DAG) const {
8227 if (Node->isVPOpcode())
8228 return expandVPFunnelShift(Node, DAG);
8229
8230 EVT VT = Node->getValueType(0);
8231
8232 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8236 return SDValue();
8237
8238 SDValue X = Node->getOperand(0);
8239 SDValue Y = Node->getOperand(1);
8240 SDValue Z = Node->getOperand(2);
8241
8242 unsigned BW = VT.getScalarSizeInBits();
8243 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8244 SDLoc DL(SDValue(Node, 0));
8245
8246 EVT ShVT = Z.getValueType();
8247
8248 // If a funnel shift in the other direction is more supported, use it.
8249 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8250 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8251 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8252 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8253 // fshl X, Y, Z -> fshr X, Y, -Z
8254 // fshr X, Y, Z -> fshl X, Y, -Z
8255 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8256 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8257 } else {
8258 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8259 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8260 SDValue One = DAG.getConstant(1, DL, ShVT);
8261 if (IsFSHL) {
8262 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8263 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8264 } else {
8265 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8266 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8267 }
8268 Z = DAG.getNOT(DL, Z, ShVT);
8269 }
8270 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8271 }
8272
8273 SDValue ShX, ShY;
8274 SDValue ShAmt, InvShAmt;
8275 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8276 // fshl: X << C | Y >> (BW - C)
8277 // fshr: X << (BW - C) | Y >> C
8278 // where C = Z % BW is not zero
8279 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8280 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8281 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8282 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8283 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8284 } else {
8285 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8286 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8287 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8288 if (isPowerOf2_32(BW)) {
8289 // Z % BW -> Z & (BW - 1)
8290 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8291 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8292 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8293 } else {
8294 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8295 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8296 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8297 }
8298
8299 SDValue One = DAG.getConstant(1, DL, ShVT);
8300 if (IsFSHL) {
8301 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8302 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8303 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8304 } else {
8305 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8306 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8307 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8308 }
8309 }
8310 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8311}
8312
8313// TODO: Merge with expandFunnelShift.
8314SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8315 SelectionDAG &DAG) const {
8316 EVT VT = Node->getValueType(0);
8317 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8318 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8319 SDValue Op0 = Node->getOperand(0);
8320 SDValue Op1 = Node->getOperand(1);
8321 SDLoc DL(SDValue(Node, 0));
8322
8323 EVT ShVT = Op1.getValueType();
8324 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8325
8326 // If a rotate in the other direction is more supported, use it.
8327 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8328 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8329 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8330 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8331 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8332 }
8333
8334 if (!AllowVectorOps && VT.isVector() &&
8340 return SDValue();
8341
8342 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8343 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8344 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8345 SDValue ShVal;
8346 SDValue HsVal;
8347 if (isPowerOf2_32(EltSizeInBits)) {
8348 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8349 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8350 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8351 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8352 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8353 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8354 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8355 } else {
8356 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8357 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8358 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8359 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8360 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8361 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8362 SDValue One = DAG.getConstant(1, DL, ShVT);
8363 HsVal =
8364 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8365 }
8366 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8367}
8368
8370 SelectionDAG &DAG) const {
8371 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8372 EVT VT = Node->getValueType(0);
8373 unsigned VTBits = VT.getScalarSizeInBits();
8374 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8375
8376 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8377 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8378 SDValue ShOpLo = Node->getOperand(0);
8379 SDValue ShOpHi = Node->getOperand(1);
8380 SDValue ShAmt = Node->getOperand(2);
8381 EVT ShAmtVT = ShAmt.getValueType();
8382 EVT ShAmtCCVT =
8383 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8384 SDLoc dl(Node);
8385
8386 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8387 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8388 // away during isel.
8389 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8390 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8391 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8392 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8393 : DAG.getConstant(0, dl, VT);
8394
8395 SDValue Tmp2, Tmp3;
8396 if (IsSHL) {
8397 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8398 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8399 } else {
8400 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8401 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8402 }
8403
8404 // If the shift amount is larger or equal than the width of a part we don't
8405 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8406 // values for large shift amounts.
8407 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8408 DAG.getConstant(VTBits, dl, ShAmtVT));
8409 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8410 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8411
8412 if (IsSHL) {
8413 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8414 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8415 } else {
8416 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8417 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8418 }
8419}
8420
8422 SelectionDAG &DAG) const {
8423 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8424 SDValue Src = Node->getOperand(OpNo);
8425 EVT SrcVT = Src.getValueType();
8426 EVT DstVT = Node->getValueType(0);
8427 SDLoc dl(SDValue(Node, 0));
8428
8429 // FIXME: Only f32 to i64 conversions are supported.
8430 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8431 return false;
8432
8433 if (Node->isStrictFPOpcode())
8434 // When a NaN is converted to an integer a trap is allowed. We can't
8435 // use this expansion here because it would eliminate that trap. Other
8436 // traps are also allowed and cannot be eliminated. See
8437 // IEEE 754-2008 sec 5.8.
8438 return false;
8439
8440 // Expand f32 -> i64 conversion
8441 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8442 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8443 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8444 EVT IntVT = SrcVT.changeTypeToInteger();
8445 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8446
8447 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8448 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8449 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8450 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8451 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8452 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8453
8454 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8455
8456 SDValue ExponentBits = DAG.getNode(
8457 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8458 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8459 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8460
8461 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8462 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8463 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8464 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8465
8466 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8467 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8468 DAG.getConstant(0x00800000, dl, IntVT));
8469
8470 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8471
8472 R = DAG.getSelectCC(
8473 dl, Exponent, ExponentLoBit,
8474 DAG.getNode(ISD::SHL, dl, DstVT, R,
8475 DAG.getZExtOrTrunc(
8476 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8477 dl, IntShVT)),
8478 DAG.getNode(ISD::SRL, dl, DstVT, R,
8479 DAG.getZExtOrTrunc(
8480 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8481 dl, IntShVT)),
8482 ISD::SETGT);
8483
8484 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8485 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8486
8487 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8488 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8489 return true;
8490}
8491
8493 SDValue &Chain,
8494 SelectionDAG &DAG) const {
8495 SDLoc dl(SDValue(Node, 0));
8496 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8497 SDValue Src = Node->getOperand(OpNo);
8498
8499 EVT SrcVT = Src.getValueType();
8500 EVT DstVT = Node->getValueType(0);
8501 EVT SetCCVT =
8502 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8503 EVT DstSetCCVT =
8504 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8505
8506 // Only expand vector types if we have the appropriate vector bit operations.
8507 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8509 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8511 return false;
8512
8513 // If the maximum float value is smaller then the signed integer range,
8514 // the destination signmask can't be represented by the float, so we can
8515 // just use FP_TO_SINT directly.
8516 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8517 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8518 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8520 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8521 if (Node->isStrictFPOpcode()) {
8522 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8523 { Node->getOperand(0), Src });
8524 Chain = Result.getValue(1);
8525 } else
8526 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8527 return true;
8528 }
8529
8530 // Don't expand it if there isn't cheap fsub instruction.
8532 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8533 return false;
8534
8535 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8536 SDValue Sel;
8537
8538 if (Node->isStrictFPOpcode()) {
8539 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8540 Node->getOperand(0), /*IsSignaling*/ true);
8541 Chain = Sel.getValue(1);
8542 } else {
8543 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8544 }
8545
8546 bool Strict = Node->isStrictFPOpcode() ||
8547 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8548
8549 if (Strict) {
8550 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8551 // signmask then offset (the result of which should be fully representable).
8552 // Sel = Src < 0x8000000000000000
8553 // FltOfs = select Sel, 0, 0x8000000000000000
8554 // IntOfs = select Sel, 0, 0x8000000000000000
8555 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8556
8557 // TODO: Should any fast-math-flags be set for the FSUB?
8558 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8559 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8560 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8561 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8562 DAG.getConstant(0, dl, DstVT),
8563 DAG.getConstant(SignMask, dl, DstVT));
8564 SDValue SInt;
8565 if (Node->isStrictFPOpcode()) {
8566 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8567 { Chain, Src, FltOfs });
8568 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8569 { Val.getValue(1), Val });
8570 Chain = SInt.getValue(1);
8571 } else {
8572 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8573 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8574 }
8575 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8576 } else {
8577 // Expand based on maximum range of FP_TO_SINT:
8578 // True = fp_to_sint(Src)
8579 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8580 // Result = select (Src < 0x8000000000000000), True, False
8581
8582 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8583 // TODO: Should any fast-math-flags be set for the FSUB?
8584 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8585 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8586 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8587 DAG.getConstant(SignMask, dl, DstVT));
8588 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8589 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8590 }
8591 return true;
8592}
8593
8595 SDValue &Chain, SelectionDAG &DAG) const {
8596 // This transform is not correct for converting 0 when rounding mode is set
8597 // to round toward negative infinity which will produce -0.0. So disable
8598 // under strictfp.
8599 if (Node->isStrictFPOpcode())
8600 return false;
8601
8602 SDValue Src = Node->getOperand(0);
8603 EVT SrcVT = Src.getValueType();
8604 EVT DstVT = Node->getValueType(0);
8605
8606 // If the input is known to be non-negative and SINT_TO_FP is legal then use
8607 // it.
8608 if (Node->getFlags().hasNonNeg() &&
8610 Result =
8611 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8612 return true;
8613 }
8614
8615 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8616 return false;
8617
8618 // Only expand vector types if we have the appropriate vector bit
8619 // operations.
8620 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8625 return false;
8626
8627 SDLoc dl(SDValue(Node, 0));
8628
8629 // Implementation of unsigned i64 to f64 following the algorithm in
8630 // __floatundidf in compiler_rt. This implementation performs rounding
8631 // correctly in all rounding modes with the exception of converting 0
8632 // when rounding toward negative infinity. In that case the fsub will
8633 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8634 // incorrect.
8635 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8636 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8637 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8638 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8639 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8640 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
8641
8642 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8643 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8644 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8645 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8646 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8647 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8648 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8649 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8650 return true;
8651}
8652
8653SDValue
8655 SelectionDAG &DAG) const {
8656 unsigned Opcode = Node->getOpcode();
8657 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8658 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8659 "Wrong opcode");
8660
8661 if (Node->getFlags().hasNoNaNs()) {
8662 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8663 EVT VT = Node->getValueType(0);
8664 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8666 VT.isVector())
8667 return SDValue();
8668 SDValue Op1 = Node->getOperand(0);
8669 SDValue Op2 = Node->getOperand(1);
8670 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
8671 Node->getFlags());
8672 }
8673
8674 return SDValue();
8675}
8676
8678 SelectionDAG &DAG) const {
8679 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8680 return Expanded;
8681
8682 EVT VT = Node->getValueType(0);
8683 if (VT.isScalableVector())
8685 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8686
8687 SDLoc dl(Node);
8688 unsigned NewOp =
8689 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8690
8691 if (isOperationLegalOrCustom(NewOp, VT)) {
8692 SDValue Quiet0 = Node->getOperand(0);
8693 SDValue Quiet1 = Node->getOperand(1);
8694
8695 if (!Node->getFlags().hasNoNaNs()) {
8696 // Insert canonicalizes if it's possible we need to quiet to get correct
8697 // sNaN behavior.
8698 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8699 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8700 Node->getFlags());
8701 }
8702 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8703 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8704 Node->getFlags());
8705 }
8706 }
8707
8708 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8709 }
8710
8711 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8712 // instead if there are no NaNs and there can't be an incompatible zero
8713 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8714 if ((Node->getFlags().hasNoNaNs() ||
8715 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8716 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8717 (Node->getFlags().hasNoSignedZeros() ||
8718 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8719 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8720 unsigned IEEE2018Op =
8721 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8722 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8723 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8724 Node->getOperand(1), Node->getFlags());
8725 }
8726
8727 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8728 return SelCC;
8729
8730 return SDValue();
8731}
8732
8734 SelectionDAG &DAG) const {
8735 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8736 return Expanded;
8737
8738 SDLoc DL(N);
8739 SDValue LHS = N->getOperand(0);
8740 SDValue RHS = N->getOperand(1);
8741 unsigned Opc = N->getOpcode();
8742 EVT VT = N->getValueType(0);
8743 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8744 bool IsMax = Opc == ISD::FMAXIMUM;
8745 SDNodeFlags Flags = N->getFlags();
8746
8747 // First, implement comparison not propagating NaN. If no native fmin or fmax
8748 // available, use plain select with setcc instead.
8750 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8751 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8752
8753 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8754 // signed zero behavior.
8755 bool MinMaxMustRespectOrderedZero = false;
8756
8757 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8758 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8759 MinMaxMustRespectOrderedZero = true;
8760 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8761 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8762 } else {
8764 return DAG.UnrollVectorOp(N);
8765
8766 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8767 SDValue Compare =
8768 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8769 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8770 }
8771
8772 // Propagate any NaN of both operands
8773 if (!N->getFlags().hasNoNaNs() &&
8774 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8775 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8777 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8778 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8779 }
8780
8781 // fminimum/fmaximum requires -0.0 less than +0.0
8782 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8784 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8785 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8786 SDValue TestZero =
8787 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8788 SDValue LCmp = DAG.getSelect(
8789 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8790 MinMax, Flags);
8791 SDValue RCmp = DAG.getSelect(
8792 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8793 LCmp, Flags);
8794 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8795 }
8796
8797 return MinMax;
8798}
8799
8801 SelectionDAG &DAG) const {
8802 SDLoc DL(Node);
8803 SDValue LHS = Node->getOperand(0);
8804 SDValue RHS = Node->getOperand(1);
8805 unsigned Opc = Node->getOpcode();
8806 EVT VT = Node->getValueType(0);
8807 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8808 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8809 const TargetOptions &Options = DAG.getTarget().Options;
8810 SDNodeFlags Flags = Node->getFlags();
8811
8812 unsigned NewOp =
8814
8815 if (isOperationLegalOrCustom(NewOp, VT)) {
8816 if (!Flags.hasNoNaNs()) {
8817 // Insert canonicalizes if it's possible we need to quiet to get correct
8818 // sNaN behavior.
8819 if (!DAG.isKnownNeverSNaN(LHS)) {
8820 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8821 }
8822 if (!DAG.isKnownNeverSNaN(RHS)) {
8823 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8824 }
8825 }
8826
8827 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8828 }
8829
8830 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8831 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8832 if (Flags.hasNoNaNs() ||
8833 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8834 unsigned IEEE2019Op =
8836 if (isOperationLegalOrCustom(IEEE2019Op, VT))
8837 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8838 }
8839
8840 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8841 // either one for +0.0 vs -0.0.
8842 if ((Flags.hasNoNaNs() ||
8843 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8844 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8845 DAG.isKnownNeverZeroFloat(RHS))) {
8846 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8847 if (isOperationLegalOrCustom(IEEE2008Op, VT))
8848 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8849 }
8850
8852 return DAG.UnrollVectorOp(Node);
8853
8854 // If only one operand is NaN, override it with another operand.
8855 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8856 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8857 }
8858 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8859 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8860 }
8861
8862 SDValue MinMax =
8863 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8864
8865 // TODO: We need quiet sNaN if strictfp.
8866
8867 // Fixup signed zero behavior.
8868 if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8870 return MinMax;
8871 }
8872 SDValue TestZero =
8873 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8874 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8875 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8876 SDValue LCmp = DAG.getSelect(
8877 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8878 MinMax, Flags);
8879 SDValue RCmp = DAG.getSelect(
8880 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8881 Flags);
8882 return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8883}
8884
8885/// Returns a true value if if this FPClassTest can be performed with an ordered
8886/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8887/// std::nullopt if it cannot be performed as a compare with 0.
8888static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8889 const fltSemantics &Semantics,
8890 const MachineFunction &MF) {
8891 FPClassTest OrderedMask = Test & ~fcNan;
8892 FPClassTest NanTest = Test & fcNan;
8893 bool IsOrdered = NanTest == fcNone;
8894 bool IsUnordered = NanTest == fcNan;
8895
8896 // Skip cases that are testing for only a qnan or snan.
8897 if (!IsOrdered && !IsUnordered)
8898 return std::nullopt;
8899
8900 if (OrderedMask == fcZero &&
8901 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8902 return IsOrdered;
8903 if (OrderedMask == (fcZero | fcSubnormal) &&
8904 MF.getDenormalMode(Semantics).inputsAreZero())
8905 return IsOrdered;
8906 return std::nullopt;
8907}
8908
8910 const FPClassTest OrigTestMask,
8911 SDNodeFlags Flags, const SDLoc &DL,
8912 SelectionDAG &DAG) const {
8913 EVT OperandVT = Op.getValueType();
8914 assert(OperandVT.isFloatingPoint());
8915 FPClassTest Test = OrigTestMask;
8916
8917 // Degenerated cases.
8918 if (Test == fcNone)
8919 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8920 if (Test == fcAllFlags)
8921 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8922
8923 // PPC double double is a pair of doubles, of which the higher part determines
8924 // the value class.
8925 if (OperandVT == MVT::ppcf128) {
8926 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8927 DAG.getConstant(1, DL, MVT::i32));
8928 OperandVT = MVT::f64;
8929 }
8930
8931 // Floating-point type properties.
8932 EVT ScalarFloatVT = OperandVT.getScalarType();
8933 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8934 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8935 bool IsF80 = (ScalarFloatVT == MVT::f80);
8936
8937 // Some checks can be implemented using float comparisons, if floating point
8938 // exceptions are ignored.
8939 if (Flags.hasNoFPExcept() &&
8941 FPClassTest FPTestMask = Test;
8942 bool IsInvertedFP = false;
8943
8944 if (FPClassTest InvertedFPCheck =
8945 invertFPClassTestIfSimpler(FPTestMask, true)) {
8946 FPTestMask = InvertedFPCheck;
8947 IsInvertedFP = true;
8948 }
8949
8950 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8951 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8952
8953 // See if we can fold an | fcNan into an unordered compare.
8954 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8955
8956 // Can't fold the ordered check if we're only testing for snan or qnan
8957 // individually.
8958 if ((FPTestMask & fcNan) != fcNan)
8959 OrderedFPTestMask = FPTestMask;
8960
8961 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8962
8963 if (std::optional<bool> IsCmp0 =
8964 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8965 IsCmp0 && (isCondCodeLegalOrCustom(
8966 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8967 OperandVT.getScalarType().getSimpleVT()))) {
8968
8969 // If denormals could be implicitly treated as 0, this is not equivalent
8970 // to a compare with 0 since it will also be true for denormals.
8971 return DAG.getSetCC(DL, ResultVT, Op,
8972 DAG.getConstantFP(0.0, DL, OperandVT),
8973 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8974 }
8975
8976 if (FPTestMask == fcNan &&
8978 OperandVT.getScalarType().getSimpleVT()))
8979 return DAG.getSetCC(DL, ResultVT, Op, Op,
8980 IsInvertedFP ? ISD::SETO : ISD::SETUO);
8981
8982 bool IsOrderedInf = FPTestMask == fcInf;
8983 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8984 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8985 : UnorderedCmpOpcode,
8986 OperandVT.getScalarType().getSimpleVT()) &&
8989 (OperandVT.isVector() &&
8991 // isinf(x) --> fabs(x) == inf
8992 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8993 SDValue Inf =
8994 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8995 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8996 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8997 }
8998
8999 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
9000 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
9001 : UnorderedCmpOpcode,
9002 OperandVT.getSimpleVT())) {
9003 // isposinf(x) --> x == inf
9004 // isneginf(x) --> x == -inf
9005 // isposinf(x) || nan --> x u== inf
9006 // isneginf(x) || nan --> x u== -inf
9007
9008 SDValue Inf = DAG.getConstantFP(
9009 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
9010 OperandVT);
9011 return DAG.getSetCC(DL, ResultVT, Op, Inf,
9012 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9013 }
9014
9015 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9016 // TODO: Could handle ordered case, but it produces worse code for
9017 // x86. Maybe handle ordered if fabs is free?
9018
9019 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9020 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9021
9022 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9023 OperandVT.getScalarType().getSimpleVT())) {
9024 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9025
9026 // TODO: Maybe only makes sense if fabs is free. Integer test of
9027 // exponent bits seems better for x86.
9028 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9029 SDValue SmallestNormal = DAG.getConstantFP(
9030 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9031 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9032 IsOrdered ? OrderedOp : UnorderedOp);
9033 }
9034 }
9035
9036 if (FPTestMask == fcNormal) {
9037 // TODO: Handle unordered
9038 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9039 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9040
9041 if (isCondCodeLegalOrCustom(IsFiniteOp,
9042 OperandVT.getScalarType().getSimpleVT()) &&
9043 isCondCodeLegalOrCustom(IsNormalOp,
9044 OperandVT.getScalarType().getSimpleVT()) &&
9045 isFAbsFree(OperandVT)) {
9046 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9047 SDValue Inf =
9048 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9049 SDValue SmallestNormal = DAG.getConstantFP(
9050 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9051
9052 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9053 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9054 SDValue IsNormal =
9055 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9056 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9057 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9058 }
9059 }
9060 }
9061
9062 // Some checks may be represented as inversion of simpler check, for example
9063 // "inf|normal|subnormal|zero" => !"nan".
9064 bool IsInverted = false;
9065
9066 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9067 Test = InvertedCheck;
9068 IsInverted = true;
9069 }
9070
9071 // In the general case use integer operations.
9072 unsigned BitSize = OperandVT.getScalarSizeInBits();
9073 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
9074 if (OperandVT.isVector())
9075 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
9076 OperandVT.getVectorElementCount());
9077 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9078
9079 // Various masks.
9080 APInt SignBit = APInt::getSignMask(BitSize);
9081 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9082 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9083 const unsigned ExplicitIntBitInF80 = 63;
9084 APInt ExpMask = Inf;
9085 if (IsF80)
9086 ExpMask.clearBit(ExplicitIntBitInF80);
9087 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9088 APInt QNaNBitMask =
9089 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9090 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9091
9092 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9093 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9094 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9095 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9096 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9097 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9098
9099 SDValue Res;
9100 const auto appendResult = [&](SDValue PartialRes) {
9101 if (PartialRes) {
9102 if (Res)
9103 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9104 else
9105 Res = PartialRes;
9106 }
9107 };
9108
9109 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9110 const auto getIntBitIsSet = [&]() -> SDValue {
9111 if (!IntBitIsSetV) {
9112 APInt IntBitMask(BitSize, 0);
9113 IntBitMask.setBit(ExplicitIntBitInF80);
9114 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9115 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9116 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9117 }
9118 return IntBitIsSetV;
9119 };
9120
9121 // Split the value into sign bit and absolute value.
9122 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9123 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9124 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9125
9126 // Tests that involve more than one class should be processed first.
9127 SDValue PartialRes;
9128
9129 if (IsF80)
9130 ; // Detect finite numbers of f80 by checking individual classes because
9131 // they have different settings of the explicit integer bit.
9132 else if ((Test & fcFinite) == fcFinite) {
9133 // finite(V) ==> abs(V) < exp_mask
9134 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9135 Test &= ~fcFinite;
9136 } else if ((Test & fcFinite) == fcPosFinite) {
9137 // finite(V) && V > 0 ==> V < exp_mask
9138 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9139 Test &= ~fcPosFinite;
9140 } else if ((Test & fcFinite) == fcNegFinite) {
9141 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9142 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9143 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9144 Test &= ~fcNegFinite;
9145 }
9146 appendResult(PartialRes);
9147
9148 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9149 // fcZero | fcSubnormal => test all exponent bits are 0
9150 // TODO: Handle sign bit specific cases
9151 if (PartialCheck == (fcZero | fcSubnormal)) {
9152 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9153 SDValue ExpIsZero =
9154 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9155 appendResult(ExpIsZero);
9156 Test &= ~PartialCheck & fcAllFlags;
9157 }
9158 }
9159
9160 // Check for individual classes.
9161
9162 if (unsigned PartialCheck = Test & fcZero) {
9163 if (PartialCheck == fcPosZero)
9164 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
9165 else if (PartialCheck == fcZero)
9166 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
9167 else // ISD::fcNegZero
9168 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
9169 appendResult(PartialRes);
9170 }
9171
9172 if (unsigned PartialCheck = Test & fcSubnormal) {
9173 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9174 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9175 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9176 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
9177 SDValue VMinusOneV =
9178 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
9179 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
9180 if (PartialCheck == fcNegSubnormal)
9181 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9182 appendResult(PartialRes);
9183 }
9184
9185 if (unsigned PartialCheck = Test & fcInf) {
9186 if (PartialCheck == fcPosInf)
9187 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9188 else if (PartialCheck == fcInf)
9189 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9190 else { // ISD::fcNegInf
9191 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9192 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9193 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9194 }
9195 appendResult(PartialRes);
9196 }
9197
9198 if (unsigned PartialCheck = Test & fcNan) {
9199 APInt InfWithQnanBit = Inf | QNaNBitMask;
9200 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9201 if (PartialCheck == fcNan) {
9202 // isnan(V) ==> abs(V) > int(inf)
9203 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9204 if (IsF80) {
9205 // Recognize unsupported values as NaNs for compatibility with glibc.
9206 // In them (exp(V)==0) == int_bit.
9207 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9208 SDValue ExpIsZero =
9209 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9210 SDValue IsPseudo =
9211 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9212 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9213 }
9214 } else if (PartialCheck == fcQNan) {
9215 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9216 PartialRes =
9217 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9218 } else { // ISD::fcSNan
9219 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9220 // abs(V) < (unsigned(Inf) | quiet_bit)
9221 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9222 SDValue IsNotQnan =
9223 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9224 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9225 }
9226 appendResult(PartialRes);
9227 }
9228
9229 if (unsigned PartialCheck = Test & fcNormal) {
9230 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9231 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9232 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9233 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9234 APInt ExpLimit = ExpMask - ExpLSB;
9235 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9236 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9237 if (PartialCheck == fcNegNormal)
9238 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9239 else if (PartialCheck == fcPosNormal) {
9240 SDValue PosSignV =
9241 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
9242 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9243 }
9244 if (IsF80)
9245 PartialRes =
9246 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9247 appendResult(PartialRes);
9248 }
9249
9250 if (!Res)
9251 return DAG.getConstant(IsInverted, DL, ResultVT);
9252 if (IsInverted)
9253 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
9254 return Res;
9255}
9256
9257// Only expand vector types if we have the appropriate vector bit operations.
9258static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9259 assert(VT.isVector() && "Expected vector type");
9260 unsigned Len = VT.getScalarSizeInBits();
9261 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9264 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9266}
9267
9269 SDLoc dl(Node);
9270 EVT VT = Node->getValueType(0);
9271 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9272 SDValue Op = Node->getOperand(0);
9273 unsigned Len = VT.getScalarSizeInBits();
9274 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9275
9276 // TODO: Add support for irregular type lengths.
9277 if (!(Len <= 128 && Len % 8 == 0))
9278 return SDValue();
9279
9280 // Only expand vector types if we have the appropriate vector bit operations.
9281 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9282 return SDValue();
9283
9284 // This is the "best" algorithm from
9285 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9286 SDValue Mask55 =
9287 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9288 SDValue Mask33 =
9289 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9290 SDValue Mask0F =
9291 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9292
9293 // v = v - ((v >> 1) & 0x55555555...)
9294 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9295 DAG.getNode(ISD::AND, dl, VT,
9296 DAG.getNode(ISD::SRL, dl, VT, Op,
9297 DAG.getConstant(1, dl, ShVT)),
9298 Mask55));
9299 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9300 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9301 DAG.getNode(ISD::AND, dl, VT,
9302 DAG.getNode(ISD::SRL, dl, VT, Op,
9303 DAG.getConstant(2, dl, ShVT)),
9304 Mask33));
9305 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9306 Op = DAG.getNode(ISD::AND, dl, VT,
9307 DAG.getNode(ISD::ADD, dl, VT, Op,
9308 DAG.getNode(ISD::SRL, dl, VT, Op,
9309 DAG.getConstant(4, dl, ShVT))),
9310 Mask0F);
9311
9312 if (Len <= 8)
9313 return Op;
9314
9315 // Avoid the multiply if we only have 2 bytes to add.
9316 // TODO: Only doing this for scalars because vectors weren't as obviously
9317 // improved.
9318 if (Len == 16 && !VT.isVector()) {
9319 // v = (v + (v >> 8)) & 0x00FF;
9320 return DAG.getNode(ISD::AND, dl, VT,
9321 DAG.getNode(ISD::ADD, dl, VT, Op,
9322 DAG.getNode(ISD::SRL, dl, VT, Op,
9323 DAG.getConstant(8, dl, ShVT))),
9324 DAG.getConstant(0xFF, dl, VT));
9325 }
9326
9327 // v = (v * 0x01010101...) >> (Len - 8)
9328 SDValue V;
9331 SDValue Mask01 =
9332 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9333 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9334 } else {
9335 V = Op;
9336 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9337 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9338 V = DAG.getNode(ISD::ADD, dl, VT, V,
9339 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9340 }
9341 }
9342 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9343}
9344
9346 SDLoc dl(Node);
9347 EVT VT = Node->getValueType(0);
9348 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9349 SDValue Op = Node->getOperand(0);
9350 SDValue Mask = Node->getOperand(1);
9351 SDValue VL = Node->getOperand(2);
9352 unsigned Len = VT.getScalarSizeInBits();
9353 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9354
9355 // TODO: Add support for irregular type lengths.
9356 if (!(Len <= 128 && Len % 8 == 0))
9357 return SDValue();
9358
9359 // This is same algorithm of expandCTPOP from
9360 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9361 SDValue Mask55 =
9362 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9363 SDValue Mask33 =
9364 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9365 SDValue Mask0F =
9366 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9367
9368 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9369
9370 // v = v - ((v >> 1) & 0x55555555...)
9371 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9372 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9373 DAG.getConstant(1, dl, ShVT), Mask, VL),
9374 Mask55, Mask, VL);
9375 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9376
9377 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9378 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9379 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9380 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9381 DAG.getConstant(2, dl, ShVT), Mask, VL),
9382 Mask33, Mask, VL);
9383 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9384
9385 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9386 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9387 Mask, VL),
9388 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9389 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9390
9391 if (Len <= 8)
9392 return Op;
9393
9394 // v = (v * 0x01010101...) >> (Len - 8)
9395 SDValue V;
9397 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9398 SDValue Mask01 =
9399 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9400 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9401 } else {
9402 V = Op;
9403 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9404 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9405 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9406 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9407 Mask, VL);
9408 }
9409 }
9410 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9411 Mask, VL);
9412}
9413
9415 SDLoc dl(Node);
9416 EVT VT = Node->getValueType(0);
9417 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9418 SDValue Op = Node->getOperand(0);
9419 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9420
9421 // If the non-ZERO_UNDEF version is supported we can use that instead.
9422 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9424 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9425
9426 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9428 EVT SetCCVT =
9429 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9430 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9431 SDValue Zero = DAG.getConstant(0, dl, VT);
9432 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9433 return DAG.getSelect(dl, VT, SrcIsZero,
9434 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9435 }
9436
9437 // Only expand vector types if we have the appropriate vector bit operations.
9438 // This includes the operations needed to expand CTPOP if it isn't supported.
9439 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9441 !canExpandVectorCTPOP(*this, VT)) ||
9444 return SDValue();
9445
9446 // for now, we do this:
9447 // x = x | (x >> 1);
9448 // x = x | (x >> 2);
9449 // ...
9450 // x = x | (x >>16);
9451 // x = x | (x >>32); // for 64-bit input
9452 // return popcount(~x);
9453 //
9454 // Ref: "Hacker's Delight" by Henry Warren
9455 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9456 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9457 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9458 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9459 }
9460 Op = DAG.getNOT(dl, Op, VT);
9461 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9462}
9463
9465 SDLoc dl(Node);
9466 EVT VT = Node->getValueType(0);
9467 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9468 SDValue Op = Node->getOperand(0);
9469 SDValue Mask = Node->getOperand(1);
9470 SDValue VL = Node->getOperand(2);
9471 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9472
9473 // do this:
9474 // x = x | (x >> 1);
9475 // x = x | (x >> 2);
9476 // ...
9477 // x = x | (x >>16);
9478 // x = x | (x >>32); // for 64-bit input
9479 // return popcount(~x);
9480 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9481 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9482 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9483 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9484 VL);
9485 }
9486 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9487 Mask, VL);
9488 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9489}
9490
9492 const SDLoc &DL, EVT VT, SDValue Op,
9493 unsigned BitWidth) const {
9494 if (BitWidth != 32 && BitWidth != 64)
9495 return SDValue();
9496 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9497 : APInt(64, 0x0218A392CD3D5DBFULL);
9498 const DataLayout &TD = DAG.getDataLayout();
9499 MachinePointerInfo PtrInfo =
9501 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9502 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9503 SDValue Lookup = DAG.getNode(
9504 ISD::SRL, DL, VT,
9505 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9506 DAG.getConstant(DeBruijn, DL, VT)),
9507 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
9509
9511 for (unsigned i = 0; i < BitWidth; i++) {
9512 APInt Shl = DeBruijn.shl(i);
9513 APInt Lshr = Shl.lshr(ShiftAmt);
9514 Table[Lshr.getZExtValue()] = i;
9515 }
9516
9517 // Create a ConstantArray in Constant Pool
9518 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9519 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9520 TD.getPrefTypeAlign(CA->getType()));
9521 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9522 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9523 PtrInfo, MVT::i8);
9524 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9525 return ExtLoad;
9526
9527 EVT SetCCVT =
9528 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9529 SDValue Zero = DAG.getConstant(0, DL, VT);
9530 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9531 return DAG.getSelect(DL, VT, SrcIsZero,
9532 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9533}
9534
9536 SDLoc dl(Node);
9537 EVT VT = Node->getValueType(0);
9538 SDValue Op = Node->getOperand(0);
9539 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9540
9541 // If the non-ZERO_UNDEF version is supported we can use that instead.
9542 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9544 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9545
9546 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9548 EVT SetCCVT =
9549 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9550 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9551 SDValue Zero = DAG.getConstant(0, dl, VT);
9552 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9553 return DAG.getSelect(dl, VT, SrcIsZero,
9554 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9555 }
9556
9557 // Only expand vector types if we have the appropriate vector bit operations.
9558 // This includes the operations needed to expand CTPOP if it isn't supported.
9559 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9562 !canExpandVectorCTPOP(*this, VT)) ||
9566 return SDValue();
9567
9568 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9569 // to be expanded or converted to a libcall.
9572 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9573 return V;
9574
9575 // for now, we use: { return popcount(~x & (x - 1)); }
9576 // unless the target has ctlz but not ctpop, in which case we use:
9577 // { return 32 - nlz(~x & (x-1)); }
9578 // Ref: "Hacker's Delight" by Henry Warren
9579 SDValue Tmp = DAG.getNode(
9580 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9581 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9582
9583 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9585 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9586 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9587 }
9588
9589 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9590}
9591
9593 SDValue Op = Node->getOperand(0);
9594 SDValue Mask = Node->getOperand(1);
9595 SDValue VL = Node->getOperand(2);
9596 SDLoc dl(Node);
9597 EVT VT = Node->getValueType(0);
9598
9599 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9600 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9601 DAG.getAllOnesConstant(dl, VT), Mask, VL);
9602 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9603 DAG.getConstant(1, dl, VT), Mask, VL);
9604 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9605 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9606}
9607
9609 SelectionDAG &DAG) const {
9610 // %cond = to_bool_vec %source
9611 // %splat = splat /*val=*/VL
9612 // %tz = step_vector
9613 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9614 // %r = vp.reduce.umin %v
9615 SDLoc DL(N);
9616 SDValue Source = N->getOperand(0);
9617 SDValue Mask = N->getOperand(1);
9618 SDValue EVL = N->getOperand(2);
9619 EVT SrcVT = Source.getValueType();
9620 EVT ResVT = N->getValueType(0);
9621 EVT ResVecVT =
9622 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9623
9624 // Convert to boolean vector.
9625 if (SrcVT.getScalarType() != MVT::i1) {
9626 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9627 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9628 SrcVT.getVectorElementCount());
9629 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9630 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9631 }
9632
9633 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9634 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9635 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9636 SDValue Select =
9637 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9638 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9639}
9640
9642 SelectionDAG &DAG) const {
9643 SDLoc DL(N);
9644 SDValue Mask = N->getOperand(0);
9645 EVT MaskVT = Mask.getValueType();
9646 EVT BoolVT = MaskVT.getScalarType();
9647
9648 // Find a suitable type for a stepvector.
9649 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
9650 if (MaskVT.isScalableVector())
9651 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
9652 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9653 unsigned EltWidth = TLI.getBitWidthForCttzElements(
9654 BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(),
9655 /*ZeroIsPoison=*/true, &VScaleRange);
9656 EVT StepVT = MVT::getIntegerVT(EltWidth);
9657 EVT StepVecVT = MaskVT.changeVectorElementType(StepVT);
9658
9659 // If promotion is required to make the type legal, do it here; promotion
9660 // of integers within LegalizeVectorOps is looking for types of the same
9661 // size but with a smaller number of larger elements, not the usual larger
9662 // size with the same number of larger elements.
9663 if (TLI.getTypeAction(StepVecVT.getSimpleVT()) ==
9665 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
9666 StepVT = StepVecVT.getVectorElementType();
9667 }
9668
9669 // Zero out lanes with inactive elements, then find the highest remaining
9670 // value from the stepvector.
9671 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
9672 SDValue StepVec = DAG.getStepVector(DL, StepVecVT);
9673 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
9674 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
9675 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
9676}
9677
9679 bool IsNegative) const {
9680 SDLoc dl(N);
9681 EVT VT = N->getValueType(0);
9682 SDValue Op = N->getOperand(0);
9683
9684 // abs(x) -> smax(x,sub(0,x))
9685 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9687 SDValue Zero = DAG.getConstant(0, dl, VT);
9688 Op = DAG.getFreeze(Op);
9689 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9690 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9691 }
9692
9693 // abs(x) -> umin(x,sub(0,x))
9694 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9696 SDValue Zero = DAG.getConstant(0, dl, VT);
9697 Op = DAG.getFreeze(Op);
9698 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9699 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9700 }
9701
9702 // 0 - abs(x) -> smin(x, sub(0,x))
9703 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9705 SDValue Zero = DAG.getConstant(0, dl, VT);
9706 Op = DAG.getFreeze(Op);
9707 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9708 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9709 }
9710
9711 // Only expand vector types if we have the appropriate vector operations.
9712 if (VT.isVector() &&
9714 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9715 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9717 return SDValue();
9718
9719 Op = DAG.getFreeze(Op);
9720 SDValue Shift = DAG.getNode(
9721 ISD::SRA, dl, VT, Op,
9722 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9723 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9724
9725 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9726 if (!IsNegative)
9727 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9728
9729 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9730 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9731}
9732
9734 SDLoc dl(N);
9735 EVT VT = N->getValueType(0);
9736 SDValue LHS = DAG.getFreeze(N->getOperand(0));
9737 SDValue RHS = DAG.getFreeze(N->getOperand(1));
9738 bool IsSigned = N->getOpcode() == ISD::ABDS;
9739
9740 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9741 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9742 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9743 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9744 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9745 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9746 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9747 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9748 }
9749
9750 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9751 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9752 return DAG.getNode(ISD::OR, dl, VT,
9753 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9754 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9755
9756 // If the subtract doesn't overflow then just use abs(sub())
9757 // NOTE: don't use frozen operands for value tracking.
9758 bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) &&
9759 DAG.SignBitIsZero(N->getOperand(0));
9760
9761 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(0),
9762 N->getOperand(1)))
9763 return DAG.getNode(ISD::ABS, dl, VT,
9764 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9765
9766 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(1),
9767 N->getOperand(0)))
9768 return DAG.getNode(ISD::ABS, dl, VT,
9769 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9770
9771 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9773 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9774
9775 // Branchless expansion iff cmp result is allbits:
9776 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9777 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9778 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9779 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9780 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9781 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9782 }
9783
9784 // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9785 // flag if the (scalar) type is illegal as this is more likely to legalize
9786 // cleanly:
9787 // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9788 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9789 SDValue USubO =
9790 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9791 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9792 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9793 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9794 }
9795
9796 // FIXME: Should really try to split the vector in case it's legal on a
9797 // subvector.
9799 return DAG.UnrollVectorOp(N);
9800
9801 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9802 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9803 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9804 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9805}
9806
9808 SDLoc dl(N);
9809 EVT VT = N->getValueType(0);
9810 SDValue LHS = N->getOperand(0);
9811 SDValue RHS = N->getOperand(1);
9812
9813 unsigned Opc = N->getOpcode();
9814 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9815 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9816 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9817 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9818 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9819 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9821 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9822 "Unknown AVG node");
9823
9824 // If the operands are already extended, we can add+shift.
9825 bool IsExt =
9826 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9827 DAG.ComputeNumSignBits(RHS) >= 2) ||
9828 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9830 if (IsExt) {
9831 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9832 if (!IsFloor)
9833 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9834 return DAG.getNode(ShiftOpc, dl, VT, Sum,
9835 DAG.getShiftAmountConstant(1, VT, dl));
9836 }
9837
9838 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9839 if (VT.isScalarInteger()) {
9840 unsigned BW = VT.getScalarSizeInBits();
9841 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9842 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9843 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9844 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9845 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9846 if (!IsFloor)
9847 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9848 DAG.getConstant(1, dl, ExtVT));
9849 // Just use SRL as we will be truncating away the extended sign bits.
9850 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9851 DAG.getShiftAmountConstant(1, ExtVT, dl));
9852 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9853 }
9854 }
9855
9856 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9857 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9858 SDValue UAddWithOverflow =
9859 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9860
9861 SDValue Sum = UAddWithOverflow.getValue(0);
9862 SDValue Overflow = UAddWithOverflow.getValue(1);
9863
9864 // Right shift the sum by 1
9865 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9866 DAG.getShiftAmountConstant(1, VT, dl));
9867
9868 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9869 SDValue OverflowShl = DAG.getNode(
9870 ISD::SHL, dl, VT, ZeroExtOverflow,
9871 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9872
9873 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9874 }
9875
9876 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9877 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9878 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9879 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9880 LHS = DAG.getFreeze(LHS);
9881 RHS = DAG.getFreeze(RHS);
9882 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9883 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9884 SDValue Shift =
9885 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9886 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9887}
9888
9890 SDLoc dl(N);
9891 EVT VT = N->getValueType(0);
9892 SDValue Op = N->getOperand(0);
9893
9894 if (!VT.isSimple())
9895 return SDValue();
9896
9897 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9898 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9899 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9900 default:
9901 return SDValue();
9902 case MVT::i16:
9903 // Use a rotate by 8. This can be further expanded if necessary.
9904 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9905 case MVT::i32:
9906 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9907 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9908 DAG.getConstant(0xFF00, dl, VT));
9909 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9910 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9911 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9912 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9913 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9914 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9915 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9916 case MVT::i64:
9917 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9918 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9919 DAG.getConstant(255ULL<<8, dl, VT));
9920 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9921 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9922 DAG.getConstant(255ULL<<16, dl, VT));
9923 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9924 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9925 DAG.getConstant(255ULL<<24, dl, VT));
9926 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9927 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9928 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9929 DAG.getConstant(255ULL<<24, dl, VT));
9930 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9931 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9932 DAG.getConstant(255ULL<<16, dl, VT));
9933 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9934 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9935 DAG.getConstant(255ULL<<8, dl, VT));
9936 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9937 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9938 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9939 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9940 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9941 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9942 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9943 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9944 }
9945}
9946
9948 SDLoc dl(N);
9949 EVT VT = N->getValueType(0);
9950 SDValue Op = N->getOperand(0);
9951 SDValue Mask = N->getOperand(1);
9952 SDValue EVL = N->getOperand(2);
9953
9954 if (!VT.isSimple())
9955 return SDValue();
9956
9957 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9958 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9959 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9960 default:
9961 return SDValue();
9962 case MVT::i16:
9963 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9964 Mask, EVL);
9965 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9966 Mask, EVL);
9967 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9968 case MVT::i32:
9969 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9970 Mask, EVL);
9971 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9972 Mask, EVL);
9973 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9974 Mask, EVL);
9975 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9976 Mask, EVL);
9977 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9978 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9979 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9980 Mask, EVL);
9981 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9982 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9983 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9984 case MVT::i64:
9985 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9986 Mask, EVL);
9987 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9988 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9989 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9990 Mask, EVL);
9991 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9992 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9993 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9994 Mask, EVL);
9995 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9996 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9997 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9998 Mask, EVL);
9999 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10000 Mask, EVL);
10001 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
10002 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10003 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10004 Mask, EVL);
10005 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
10006 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10007 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
10008 Mask, EVL);
10009 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10010 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10011 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10012 Mask, EVL);
10013 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
10014 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
10015 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10016 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10017 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
10018 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10019 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
10020 }
10021}
10022
10024 SDLoc dl(N);
10025 EVT VT = N->getValueType(0);
10026 SDValue Op = N->getOperand(0);
10027 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10028 unsigned Sz = VT.getScalarSizeInBits();
10029
10030 SDValue Tmp, Tmp2, Tmp3;
10031
10032 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10033 // and finally the i1 pairs.
10034 // TODO: We can easily support i4/i2 legal types if any target ever does.
10035 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10036 // Create the masks - repeating the pattern every byte.
10037 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10038 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10039 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10040
10041 // BSWAP if the type is wider than a single byte.
10042 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
10043
10044 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10045 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
10046 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
10047 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
10048 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
10049 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10050
10051 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10052 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
10053 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
10054 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
10055 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
10056 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10057
10058 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10059 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
10060 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
10061 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
10062 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
10063 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10064 return Tmp;
10065 }
10066
10067 Tmp = DAG.getConstant(0, dl, VT);
10068 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
10069 if (I < J)
10070 Tmp2 =
10071 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
10072 else
10073 Tmp2 =
10074 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
10075
10076 APInt Shift = APInt::getOneBitSet(Sz, J);
10077 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
10078 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
10079 }
10080
10081 return Tmp;
10082}
10083
10085 assert(N->getOpcode() == ISD::VP_BITREVERSE);
10086
10087 SDLoc dl(N);
10088 EVT VT = N->getValueType(0);
10089 SDValue Op = N->getOperand(0);
10090 SDValue Mask = N->getOperand(1);
10091 SDValue EVL = N->getOperand(2);
10092 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10093 unsigned Sz = VT.getScalarSizeInBits();
10094
10095 SDValue Tmp, Tmp2, Tmp3;
10096
10097 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10098 // and finally the i1 pairs.
10099 // TODO: We can easily support i4/i2 legal types if any target ever does.
10100 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10101 // Create the masks - repeating the pattern every byte.
10102 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10103 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10104 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10105
10106 // BSWAP if the type is wider than a single byte.
10107 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
10108
10109 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10110 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
10111 Mask, EVL);
10112 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10113 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
10114 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
10115 Mask, EVL);
10116 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
10117 Mask, EVL);
10118 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10119
10120 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10121 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
10122 Mask, EVL);
10123 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10124 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
10125 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
10126 Mask, EVL);
10127 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
10128 Mask, EVL);
10129 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10130
10131 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10132 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
10133 Mask, EVL);
10134 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10135 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
10136 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
10137 Mask, EVL);
10138 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
10139 Mask, EVL);
10140 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10141 return Tmp;
10142 }
10143 return SDValue();
10144}
10145
10146std::pair<SDValue, SDValue>
10148 SelectionDAG &DAG) const {
10149 SDLoc SL(LD);
10150 SDValue Chain = LD->getChain();
10151 SDValue BasePTR = LD->getBasePtr();
10152 EVT SrcVT = LD->getMemoryVT();
10153 EVT DstVT = LD->getValueType(0);
10154 ISD::LoadExtType ExtType = LD->getExtensionType();
10155
10156 if (SrcVT.isScalableVector())
10157 report_fatal_error("Cannot scalarize scalable vector loads");
10158
10159 unsigned NumElem = SrcVT.getVectorNumElements();
10160
10161 EVT SrcEltVT = SrcVT.getScalarType();
10162 EVT DstEltVT = DstVT.getScalarType();
10163
10164 // A vector must always be stored in memory as-is, i.e. without any padding
10165 // between the elements, since various code depend on it, e.g. in the
10166 // handling of a bitcast of a vector type to int, which may be done with a
10167 // vector store followed by an integer load. A vector that does not have
10168 // elements that are byte-sized must therefore be stored as an integer
10169 // built out of the extracted vector elements.
10170 if (!SrcEltVT.isByteSized()) {
10171 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10172 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
10173
10174 unsigned NumSrcBits = SrcVT.getSizeInBits();
10175 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
10176
10177 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10178 SDValue SrcEltBitMask = DAG.getConstant(
10179 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
10180
10181 // Load the whole vector and avoid masking off the top bits as it makes
10182 // the codegen worse.
10183 SDValue Load =
10184 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
10185 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
10186 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10187
10189 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10190 unsigned ShiftIntoIdx =
10191 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10192 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10193 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
10194 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
10195 SDValue Elt =
10196 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
10197 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
10198
10199 if (ExtType != ISD::NON_EXTLOAD) {
10200 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
10201 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
10202 }
10203
10204 Vals.push_back(Scalar);
10205 }
10206
10207 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10208 return std::make_pair(Value, Load.getValue(1));
10209 }
10210
10211 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10212 assert(SrcEltVT.isByteSized());
10213
10215 SmallVector<SDValue, 8> LoadChains;
10216
10217 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10218 SDValue ScalarLoad = DAG.getExtLoad(
10219 ExtType, SL, DstEltVT, Chain, BasePTR,
10220 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
10221 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10222
10223 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10224
10225 Vals.push_back(ScalarLoad.getValue(0));
10226 LoadChains.push_back(ScalarLoad.getValue(1));
10227 }
10228
10229 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10230 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10231
10232 return std::make_pair(Value, NewChain);
10233}
10234
10236 SelectionDAG &DAG) const {
10237 SDLoc SL(ST);
10238
10239 SDValue Chain = ST->getChain();
10240 SDValue BasePtr = ST->getBasePtr();
10241 SDValue Value = ST->getValue();
10242 EVT StVT = ST->getMemoryVT();
10243
10244 if (StVT.isScalableVector())
10245 report_fatal_error("Cannot scalarize scalable vector stores");
10246
10247 // The type of the data we want to save
10248 EVT RegVT = Value.getValueType();
10249 EVT RegSclVT = RegVT.getScalarType();
10250
10251 // The type of data as saved in memory.
10252 EVT MemSclVT = StVT.getScalarType();
10253
10254 unsigned NumElem = StVT.getVectorNumElements();
10255
10256 // A vector must always be stored in memory as-is, i.e. without any padding
10257 // between the elements, since various code depend on it, e.g. in the
10258 // handling of a bitcast of a vector type to int, which may be done with a
10259 // vector store followed by an integer load. A vector that does not have
10260 // elements that are byte-sized must therefore be stored as an integer
10261 // built out of the extracted vector elements.
10262 if (!MemSclVT.isByteSized()) {
10263 unsigned NumBits = StVT.getSizeInBits();
10264 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10265
10266 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10267
10268 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10269 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10270 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10271 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10272 unsigned ShiftIntoIdx =
10273 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10274 SDValue ShiftAmount =
10275 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10276 SDValue ShiftedElt =
10277 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10278 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10279 }
10280
10281 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10282 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10283 ST->getAAInfo());
10284 }
10285
10286 // Store Stride in bytes
10287 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10288 assert(Stride && "Zero stride!");
10289 // Extract each of the elements from the original vector and save them into
10290 // memory individually.
10292 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10293 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10294
10295 SDValue Ptr =
10296 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10297
10298 // This scalar TruncStore may be illegal, but we legalize it later.
10299 SDValue Store = DAG.getTruncStore(
10300 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10301 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10302 ST->getAAInfo());
10303
10304 Stores.push_back(Store);
10305 }
10306
10307 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10308}
10309
10310std::pair<SDValue, SDValue>
10312 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10313 "unaligned indexed loads not implemented!");
10314 SDValue Chain = LD->getChain();
10315 SDValue Ptr = LD->getBasePtr();
10316 EVT VT = LD->getValueType(0);
10317 EVT LoadedVT = LD->getMemoryVT();
10318 SDLoc dl(LD);
10319 auto &MF = DAG.getMachineFunction();
10320
10321 if (VT.isFloatingPoint() || VT.isVector()) {
10322 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10323 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10324 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10325 LoadedVT.isVector()) {
10326 // Scalarize the load and let the individual components be handled.
10327 return scalarizeVectorLoad(LD, DAG);
10328 }
10329
10330 // Expand to a (misaligned) integer load of the same size,
10331 // then bitconvert to floating point or vector.
10332 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10333 LD->getMemOperand());
10334 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10335 if (LoadedVT != VT)
10336 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10337 ISD::ANY_EXTEND, dl, VT, Result);
10338
10339 return std::make_pair(Result, newLoad.getValue(1));
10340 }
10341
10342 // Copy the value to a (aligned) stack slot using (unaligned) integer
10343 // loads and stores, then do a (aligned) load from the stack slot.
10344 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10345 unsigned LoadedBytes = LoadedVT.getStoreSize();
10346 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10347 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10348
10349 // Make sure the stack slot is also aligned for the register type.
10350 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10351 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10353 SDValue StackPtr = StackBase;
10354 unsigned Offset = 0;
10355
10356 EVT PtrVT = Ptr.getValueType();
10357 EVT StackPtrVT = StackPtr.getValueType();
10358
10359 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10360 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10361
10362 // Do all but one copies using the full register width.
10363 for (unsigned i = 1; i < NumRegs; i++) {
10364 // Load one integer register's worth from the original location.
10365 SDValue Load = DAG.getLoad(
10366 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10367 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10368 // Follow the load with a store to the stack slot. Remember the store.
10369 Stores.push_back(DAG.getStore(
10370 Load.getValue(1), dl, Load, StackPtr,
10371 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10372 // Increment the pointers.
10373 Offset += RegBytes;
10374
10375 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10376 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10377 }
10378
10379 // The last copy may be partial. Do an extending load.
10380 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10381 8 * (LoadedBytes - Offset));
10382 SDValue Load = DAG.getExtLoad(
10383 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10384 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
10385 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10386 // Follow the load with a store to the stack slot. Remember the store.
10387 // On big-endian machines this requires a truncating store to ensure
10388 // that the bits end up in the right place.
10389 Stores.push_back(DAG.getTruncStore(
10390 Load.getValue(1), dl, Load, StackPtr,
10391 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10392
10393 // The order of the stores doesn't matter - say it with a TokenFactor.
10394 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10395
10396 // Finally, perform the original load only redirected to the stack slot.
10397 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10398 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10399 LoadedVT);
10400
10401 // Callers expect a MERGE_VALUES node.
10402 return std::make_pair(Load, TF);
10403 }
10404
10405 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10406 "Unaligned load of unsupported type.");
10407
10408 // Compute the new VT that is half the size of the old one. This is an
10409 // integer MVT.
10410 unsigned NumBits = LoadedVT.getSizeInBits();
10411 EVT NewLoadedVT;
10412 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10413 NumBits >>= 1;
10414
10415 Align Alignment = LD->getBaseAlign();
10416 unsigned IncrementSize = NumBits / 8;
10417 ISD::LoadExtType HiExtType = LD->getExtensionType();
10418
10419 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10420 if (HiExtType == ISD::NON_EXTLOAD)
10421 HiExtType = ISD::ZEXTLOAD;
10422
10423 // Load the value in two parts
10424 SDValue Lo, Hi;
10425 if (DAG.getDataLayout().isLittleEndian()) {
10426 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10427 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10428 LD->getAAInfo());
10429
10430 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10431 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10432 LD->getPointerInfo().getWithOffset(IncrementSize),
10433 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10434 LD->getAAInfo());
10435 } else {
10436 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10437 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10438 LD->getAAInfo());
10439
10440 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10441 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10442 LD->getPointerInfo().getWithOffset(IncrementSize),
10443 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10444 LD->getAAInfo());
10445 }
10446
10447 // aggregate the two parts
10448 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10449 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10450 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10451
10452 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10453 Hi.getValue(1));
10454
10455 return std::make_pair(Result, TF);
10456}
10457
10459 SelectionDAG &DAG) const {
10460 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10461 "unaligned indexed stores not implemented!");
10462 SDValue Chain = ST->getChain();
10463 SDValue Ptr = ST->getBasePtr();
10464 SDValue Val = ST->getValue();
10465 EVT VT = Val.getValueType();
10466 Align Alignment = ST->getBaseAlign();
10467 auto &MF = DAG.getMachineFunction();
10468 EVT StoreMemVT = ST->getMemoryVT();
10469
10470 SDLoc dl(ST);
10471 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10472 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10473 if (isTypeLegal(intVT)) {
10474 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10475 StoreMemVT.isVector()) {
10476 // Scalarize the store and let the individual components be handled.
10477 SDValue Result = scalarizeVectorStore(ST, DAG);
10478 return Result;
10479 }
10480 // Expand to a bitconvert of the value to the integer type of the
10481 // same size, then a (misaligned) int store.
10482 // FIXME: Does not handle truncating floating point stores!
10483 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10484 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10485 Alignment, ST->getMemOperand()->getFlags());
10486 return Result;
10487 }
10488 // Do a (aligned) store to a stack slot, then copy from the stack slot
10489 // to the final destination using (unaligned) integer loads and stores.
10490 MVT RegVT = getRegisterType(
10491 *DAG.getContext(),
10492 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10493 EVT PtrVT = Ptr.getValueType();
10494 unsigned StoredBytes = StoreMemVT.getStoreSize();
10495 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10496 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10497
10498 // Make sure the stack slot is also aligned for the register type.
10499 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10500 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10501
10502 // Perform the original store, only redirected to the stack slot.
10503 SDValue Store = DAG.getTruncStore(
10504 Chain, dl, Val, StackPtr,
10505 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10506
10507 EVT StackPtrVT = StackPtr.getValueType();
10508
10509 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10510 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10512 unsigned Offset = 0;
10513
10514 // Do all but one copies using the full register width.
10515 for (unsigned i = 1; i < NumRegs; i++) {
10516 // Load one integer register's worth from the stack slot.
10517 SDValue Load = DAG.getLoad(
10518 RegVT, dl, Store, StackPtr,
10519 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10520 // Store it to the final location. Remember the store.
10521 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10522 ST->getPointerInfo().getWithOffset(Offset),
10523 ST->getBaseAlign(),
10524 ST->getMemOperand()->getFlags()));
10525 // Increment the pointers.
10526 Offset += RegBytes;
10527 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10528 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10529 }
10530
10531 // The last store may be partial. Do a truncating store. On big-endian
10532 // machines this requires an extending load from the stack slot to ensure
10533 // that the bits are in the right place.
10534 EVT LoadMemVT =
10535 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10536
10537 // Load from the stack slot.
10538 SDValue Load = DAG.getExtLoad(
10539 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10540 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10541
10542 Stores.push_back(DAG.getTruncStore(
10543 Load.getValue(1), dl, Load, Ptr,
10544 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10545 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10546 // The order of the stores doesn't matter - say it with a TokenFactor.
10547 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10548 return Result;
10549 }
10550
10551 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10552 "Unaligned store of unknown type.");
10553 // Get the half-size VT
10554 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10555 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10556 unsigned IncrementSize = NumBits / 8;
10557
10558 // Divide the stored value in two parts.
10559 SDValue ShiftAmount =
10560 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10561 SDValue Lo = Val;
10562 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10563 // fold and not use the upper bits. A smaller constant may be easier to
10564 // materialize.
10565 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10566 Lo = DAG.getNode(
10567 ISD::AND, dl, VT, Lo,
10568 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10569 VT));
10570 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10571
10572 // Store the two parts
10573 SDValue Store1, Store2;
10574 Store1 = DAG.getTruncStore(Chain, dl,
10575 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10576 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10577 ST->getMemOperand()->getFlags());
10578
10579 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10580 Store2 = DAG.getTruncStore(
10581 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10582 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10583 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10584
10585 SDValue Result =
10586 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10587 return Result;
10588}
10589
10590SDValue
10592 const SDLoc &DL, EVT DataVT,
10593 SelectionDAG &DAG,
10594 bool IsCompressedMemory) const {
10595 SDValue Increment;
10596 EVT AddrVT = Addr.getValueType();
10597 EVT MaskVT = Mask.getValueType();
10598 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10599 "Incompatible types of Data and Mask");
10600 if (IsCompressedMemory) {
10601 if (DataVT.isScalableVector())
10603 "Cannot currently handle compressed memory with scalable vectors");
10604 // Incrementing the pointer according to number of '1's in the mask.
10605 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10606 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10607 if (MaskIntVT.getSizeInBits() < 32) {
10608 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10609 MaskIntVT = MVT::i32;
10610 }
10611
10612 // Count '1's with POPCNT.
10613 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10614 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10615 // Scale is an element size in bytes.
10616 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10617 AddrVT);
10618 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10619 } else if (DataVT.isScalableVector()) {
10620 Increment = DAG.getVScale(DL, AddrVT,
10621 APInt(AddrVT.getFixedSizeInBits(),
10622 DataVT.getStoreSize().getKnownMinValue()));
10623 } else
10624 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10625
10626 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10627}
10628
10630 EVT VecVT, const SDLoc &dl,
10631 ElementCount SubEC) {
10632 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10633 "Cannot index a scalable vector within a fixed-width vector");
10634
10635 unsigned NElts = VecVT.getVectorMinNumElements();
10636 unsigned NumSubElts = SubEC.getKnownMinValue();
10637 EVT IdxVT = Idx.getValueType();
10638
10639 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10640 // If this is a constant index and we know the value plus the number of the
10641 // elements in the subvector minus one is less than the minimum number of
10642 // elements then it's safe to return Idx.
10643 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10644 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10645 return Idx;
10646 SDValue VS =
10647 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10648 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10649 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10650 DAG.getConstant(NumSubElts, dl, IdxVT));
10651 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10652 }
10653 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10654 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10655 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10656 DAG.getConstant(Imm, dl, IdxVT));
10657 }
10658 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10659 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10660 DAG.getConstant(MaxIndex, dl, IdxVT));
10661}
10662
10664 SDValue VecPtr, EVT VecVT,
10665 SDValue Index) const {
10666 return getVectorSubVecPointer(
10667 DAG, VecPtr, VecVT,
10669 Index);
10670}
10671
10673 SDValue VecPtr, EVT VecVT,
10674 EVT SubVecVT,
10675 SDValue Index) const {
10676 SDLoc dl(Index);
10677 // Make sure the index type is big enough to compute in.
10678 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10679
10680 EVT EltVT = VecVT.getVectorElementType();
10681
10682 // Calculate the element offset and add it to the pointer.
10683 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10684 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10685 "Converting bits to bytes lost precision");
10686 assert(SubVecVT.getVectorElementType() == EltVT &&
10687 "Sub-vector must be a vector with matching element type");
10688 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10689 SubVecVT.getVectorElementCount());
10690
10691 EVT IdxVT = Index.getValueType();
10692 if (SubVecVT.isScalableVector())
10693 Index =
10694 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10695 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10696
10697 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10698 DAG.getConstant(EltSize, dl, IdxVT));
10699 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10700}
10701
10702//===----------------------------------------------------------------------===//
10703// Implementation of Emulated TLS Model
10704//===----------------------------------------------------------------------===//
10705
10707 SelectionDAG &DAG) const {
10708 // Access to address of TLS varialbe xyz is lowered to a function call:
10709 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10710 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10711 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10712 SDLoc dl(GA);
10713
10714 ArgListTy Args;
10715 const GlobalValue *GV =
10716 cast<GlobalValue>(GA->getGlobal()->stripPointerCastsAndAliases());
10717 SmallString<32> NameString("__emutls_v.");
10718 NameString += GV->getName();
10719 StringRef EmuTlsVarName(NameString);
10720 const GlobalVariable *EmuTlsVar =
10721 GV->getParent()->getNamedGlobal(EmuTlsVarName);
10722 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10723 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
10724
10725 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10726
10728 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10729 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10730 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10731
10732 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10733 // At last for X86 targets, maybe good for other targets too?
10735 MFI.setAdjustsStack(true); // Is this only for X86 target?
10736 MFI.setHasCalls(true);
10737
10738 assert((GA->getOffset() == 0) &&
10739 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10740 return CallResult.first;
10741}
10742
10744 SelectionDAG &DAG) const {
10745 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10746 if (!isCtlzFast())
10747 return SDValue();
10748 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10749 SDLoc dl(Op);
10750 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10751 EVT VT = Op.getOperand(0).getValueType();
10752 SDValue Zext = Op.getOperand(0);
10753 if (VT.bitsLT(MVT::i32)) {
10754 VT = MVT::i32;
10755 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10756 }
10757 unsigned Log2b = Log2_32(VT.getSizeInBits());
10758 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10759 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10760 DAG.getConstant(Log2b, dl, MVT::i32));
10761 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10762 }
10763 return SDValue();
10764}
10765
10767 SDValue Op0 = Node->getOperand(0);
10768 SDValue Op1 = Node->getOperand(1);
10769 EVT VT = Op0.getValueType();
10770 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10771 unsigned Opcode = Node->getOpcode();
10772 SDLoc DL(Node);
10773
10774 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10775 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10777 Op0 = DAG.getFreeze(Op0);
10778 SDValue Zero = DAG.getConstant(0, DL, VT);
10779 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10780 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10781 }
10782
10783 // umin(x,y) -> sub(x,usubsat(x,y))
10784 // TODO: Missing freeze(Op0)?
10785 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10787 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10788 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10789 }
10790
10791 // umax(x,y) -> add(x,usubsat(y,x))
10792 // TODO: Missing freeze(Op0)?
10793 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10795 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10796 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10797 }
10798
10799 // FIXME: Should really try to split the vector in case it's legal on a
10800 // subvector.
10802 return DAG.UnrollVectorOp(Node);
10803
10804 // Attempt to find an existing SETCC node that we can reuse.
10805 // TODO: Do we need a generic doesSETCCNodeExist?
10806 // TODO: Missing freeze(Op0)/freeze(Op1)?
10807 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10808 ISD::CondCode PrefCommuteCC,
10809 ISD::CondCode AltCommuteCC) {
10810 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10811 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10812 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10813 {Op0, Op1, DAG.getCondCode(CC)})) {
10814 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10815 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10816 }
10817 }
10818 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10819 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10820 {Op0, Op1, DAG.getCondCode(CC)})) {
10821 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10822 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10823 }
10824 }
10825 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10826 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10827 };
10828
10829 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10830 // -> Y = (A < B) ? B : A
10831 // -> Y = (A >= B) ? A : B
10832 // -> Y = (A <= B) ? B : A
10833 switch (Opcode) {
10834 case ISD::SMAX:
10835 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10836 case ISD::SMIN:
10837 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10838 case ISD::UMAX:
10839 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10840 case ISD::UMIN:
10841 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10842 }
10843
10844 llvm_unreachable("How did we get here?");
10845}
10846
10848 unsigned Opcode = Node->getOpcode();
10849 SDValue LHS = Node->getOperand(0);
10850 SDValue RHS = Node->getOperand(1);
10851 EVT VT = LHS.getValueType();
10852 SDLoc dl(Node);
10853
10854 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10855 assert(VT.isInteger() && "Expected operands to be integers");
10856
10857 // usub.sat(a, b) -> umax(a, b) - b
10858 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10859 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10860 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10861 }
10862
10863 // uadd.sat(a, b) -> umin(a, ~b) + b
10864 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10865 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10866 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10867 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10868 }
10869
10870 unsigned OverflowOp;
10871 switch (Opcode) {
10872 case ISD::SADDSAT:
10873 OverflowOp = ISD::SADDO;
10874 break;
10875 case ISD::UADDSAT:
10876 OverflowOp = ISD::UADDO;
10877 break;
10878 case ISD::SSUBSAT:
10879 OverflowOp = ISD::SSUBO;
10880 break;
10881 case ISD::USUBSAT:
10882 OverflowOp = ISD::USUBO;
10883 break;
10884 default:
10885 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10886 "addition or subtraction node.");
10887 }
10888
10889 // FIXME: Should really try to split the vector in case it's legal on a
10890 // subvector.
10892 return DAG.UnrollVectorOp(Node);
10893
10894 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10895 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10896 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10897 SDValue SumDiff = Result.getValue(0);
10898 SDValue Overflow = Result.getValue(1);
10899 SDValue Zero = DAG.getConstant(0, dl, VT);
10900 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10901
10902 if (Opcode == ISD::UADDSAT) {
10904 // (LHS + RHS) | OverflowMask
10905 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10906 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10907 }
10908 // Overflow ? 0xffff.... : (LHS + RHS)
10909 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10910 }
10911
10912 if (Opcode == ISD::USUBSAT) {
10914 // (LHS - RHS) & ~OverflowMask
10915 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10916 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10917 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10918 }
10919 // Overflow ? 0 : (LHS - RHS)
10920 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10921 }
10922
10923 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10926
10927 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10928 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10929
10930 // If either of the operand signs are known, then they are guaranteed to
10931 // only saturate in one direction. If non-negative they will saturate
10932 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10933 //
10934 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10935 // sign of 'y' has to be flipped.
10936
10937 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10938 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10939 : KnownRHS.isNegative();
10940 if (LHSIsNonNegative || RHSIsNonNegative) {
10941 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10942 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10943 }
10944
10945 bool LHSIsNegative = KnownLHS.isNegative();
10946 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10947 : KnownRHS.isNonNegative();
10948 if (LHSIsNegative || RHSIsNegative) {
10949 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10950 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10951 }
10952 }
10953
10954 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10956 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10957 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10958 DAG.getConstant(BitWidth - 1, dl, VT));
10959 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10960 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10961}
10962
10964 unsigned Opcode = Node->getOpcode();
10965 SDValue LHS = Node->getOperand(0);
10966 SDValue RHS = Node->getOperand(1);
10967 EVT VT = LHS.getValueType();
10968 EVT ResVT = Node->getValueType(0);
10969 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10970 SDLoc dl(Node);
10971
10972 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10973 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10974 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10975 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10976
10977 // We can't perform arithmetic on i1 values. Extending them would
10978 // probably result in worse codegen, so let's just use two selects instead.
10979 // Some targets are also just better off using selects rather than subtraction
10980 // because one of the conditions can be merged with one of the selects.
10981 // And finally, if we don't know the contents of high bits of a boolean value
10982 // we can't perform any arithmetic either.
10983 if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10985 SDValue SelectZeroOrOne =
10986 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10987 DAG.getConstant(0, dl, ResVT));
10988 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10989 SelectZeroOrOne);
10990 }
10991
10993 std::swap(IsGT, IsLT);
10994 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10995 ResVT);
10996}
10997
10999 unsigned Opcode = Node->getOpcode();
11000 bool IsSigned = Opcode == ISD::SSHLSAT;
11001 SDValue LHS = Node->getOperand(0);
11002 SDValue RHS = Node->getOperand(1);
11003 EVT VT = LHS.getValueType();
11004 SDLoc dl(Node);
11005
11006 assert((Node->getOpcode() == ISD::SSHLSAT ||
11007 Node->getOpcode() == ISD::USHLSAT) &&
11008 "Expected a SHLSAT opcode");
11009 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11010 assert(VT.isInteger() && "Expected operands to be integers");
11011
11013 return DAG.UnrollVectorOp(Node);
11014
11015 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
11016
11017 unsigned BW = VT.getScalarSizeInBits();
11018 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11019 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
11020 SDValue Orig =
11021 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
11022
11023 SDValue SatVal;
11024 if (IsSigned) {
11025 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
11026 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
11027 SDValue Cond =
11028 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
11029 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
11030 } else {
11031 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
11032 }
11033 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
11034 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
11035}
11036
11038 bool Signed, SDValue &Lo, SDValue &Hi,
11039 SDValue LHS, SDValue RHS,
11040 SDValue HiLHS, SDValue HiRHS) const {
11041 EVT VT = LHS.getValueType();
11042 assert(RHS.getValueType() == VT && "Mismatching operand types");
11043
11044 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
11045 assert((!Signed || !HiLHS) &&
11046 "Signed flag should only be set when HiLHS and RiRHS are null");
11047
11048 // We'll expand the multiplication by brute force because we have no other
11049 // options. This is a trivially-generalized version of the code from
11050 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
11051 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11052 // sign bits while calculating the Hi half.
11053 unsigned Bits = VT.getSizeInBits();
11054 unsigned HalfBits = Bits / 2;
11055 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
11056 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
11057 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
11058
11059 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
11060 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
11061
11062 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
11063 // This is always an unsigned shift.
11064 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
11065
11066 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11067 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
11068 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
11069
11070 SDValue U =
11071 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
11072 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
11073 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
11074
11075 SDValue V =
11076 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
11077 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
11078
11079 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
11080 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
11081
11082 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
11083 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
11084
11085 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11086 // the products to Hi.
11087 if (HiLHS) {
11088 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
11089 DAG.getNode(ISD::ADD, dl, VT,
11090 DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS),
11091 DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS)));
11092 }
11093}
11094
11096 bool Signed, const SDValue LHS,
11097 const SDValue RHS, SDValue &Lo,
11098 SDValue &Hi) const {
11099 EVT VT = LHS.getValueType();
11100 assert(RHS.getValueType() == VT && "Mismatching operand types");
11101 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
11102 // We can fall back to a libcall with an illegal type for the MUL if we
11103 // have a libcall big enough.
11104 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11105 if (WideVT == MVT::i16)
11106 LC = RTLIB::MUL_I16;
11107 else if (WideVT == MVT::i32)
11108 LC = RTLIB::MUL_I32;
11109 else if (WideVT == MVT::i64)
11110 LC = RTLIB::MUL_I64;
11111 else if (WideVT == MVT::i128)
11112 LC = RTLIB::MUL_I128;
11113
11114 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
11115 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11116 return;
11117 }
11118
11119 SDValue HiLHS, HiRHS;
11120 if (Signed) {
11121 // The high part is obtained by SRA'ing all but one of the bits of low
11122 // part.
11123 unsigned LoSize = VT.getFixedSizeInBits();
11124 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
11125 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
11126 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
11127 } else {
11128 HiLHS = DAG.getConstant(0, dl, VT);
11129 HiRHS = DAG.getConstant(0, dl, VT);
11130 }
11131
11132 // Attempt a libcall.
11133 SDValue Ret;
11135 CallOptions.setIsSigned(Signed);
11136 CallOptions.setIsPostTypeLegalization(true);
11137 if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
11138 // Halves of WideVT are packed into registers in different order
11139 // depending on platform endianness. This is usually handled by
11140 // the C calling convention, but we can't defer to it in
11141 // the legalizer.
11142 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11143 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11144 } else {
11145 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11146 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11147 }
11148 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
11149 "Ret value is a collection of constituent nodes holding result.");
11150 if (DAG.getDataLayout().isLittleEndian()) {
11151 // Same as above.
11152 Lo = Ret.getOperand(0);
11153 Hi = Ret.getOperand(1);
11154 } else {
11155 Lo = Ret.getOperand(1);
11156 Hi = Ret.getOperand(0);
11157 }
11158}
11159
11160SDValue
11162 assert((Node->getOpcode() == ISD::SMULFIX ||
11163 Node->getOpcode() == ISD::UMULFIX ||
11164 Node->getOpcode() == ISD::SMULFIXSAT ||
11165 Node->getOpcode() == ISD::UMULFIXSAT) &&
11166 "Expected a fixed point multiplication opcode");
11167
11168 SDLoc dl(Node);
11169 SDValue LHS = Node->getOperand(0);
11170 SDValue RHS = Node->getOperand(1);
11171 EVT VT = LHS.getValueType();
11172 unsigned Scale = Node->getConstantOperandVal(2);
11173 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
11174 Node->getOpcode() == ISD::UMULFIXSAT);
11175 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
11176 Node->getOpcode() == ISD::SMULFIXSAT);
11177 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11178 unsigned VTSize = VT.getScalarSizeInBits();
11179
11180 if (!Scale) {
11181 // [us]mul.fix(a, b, 0) -> mul(a, b)
11182 if (!Saturating) {
11184 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11185 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
11186 SDValue Result =
11187 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11188 SDValue Product = Result.getValue(0);
11189 SDValue Overflow = Result.getValue(1);
11190 SDValue Zero = DAG.getConstant(0, dl, VT);
11191
11192 APInt MinVal = APInt::getSignedMinValue(VTSize);
11193 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
11194 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11195 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11196 // Xor the inputs, if resulting sign bit is 0 the product will be
11197 // positive, else negative.
11198 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
11199 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
11200 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
11201 return DAG.getSelect(dl, VT, Overflow, Result, Product);
11202 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
11203 SDValue Result =
11204 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11205 SDValue Product = Result.getValue(0);
11206 SDValue Overflow = Result.getValue(1);
11207
11208 APInt MaxVal = APInt::getMaxValue(VTSize);
11209 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11210 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
11211 }
11212 }
11213
11214 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11215 "Expected scale to be less than the number of bits if signed or at "
11216 "most the number of bits if unsigned.");
11217 assert(LHS.getValueType() == RHS.getValueType() &&
11218 "Expected both operands to be the same type");
11219
11220 // Get the upper and lower bits of the result.
11221 SDValue Lo, Hi;
11222 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11223 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11224 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11225 if (VT.isVector())
11226 WideVT =
11228 if (isOperationLegalOrCustom(LoHiOp, VT)) {
11229 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11230 Lo = Result.getValue(0);
11231 Hi = Result.getValue(1);
11232 } else if (isOperationLegalOrCustom(HiOp, VT)) {
11233 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11234 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11235 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11236 // Try for a multiplication using a wider type.
11237 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11238 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11239 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11240 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11241 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11242 SDValue Shifted =
11243 DAG.getNode(ISD::SRA, dl, WideVT, Res,
11244 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11245 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11246 } else if (VT.isVector()) {
11247 return SDValue();
11248 } else {
11249 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11250 }
11251
11252 if (Scale == VTSize)
11253 // Result is just the top half since we'd be shifting by the width of the
11254 // operand. Overflow impossible so this works for both UMULFIX and
11255 // UMULFIXSAT.
11256 return Hi;
11257
11258 // The result will need to be shifted right by the scale since both operands
11259 // are scaled. The result is given to us in 2 halves, so we only want part of
11260 // both in the result.
11261 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11262 DAG.getShiftAmountConstant(Scale, VT, dl));
11263 if (!Saturating)
11264 return Result;
11265
11266 if (!Signed) {
11267 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11268 // widened multiplication) aren't all zeroes.
11269
11270 // Saturate to max if ((Hi >> Scale) != 0),
11271 // which is the same as if (Hi > ((1 << Scale) - 1))
11272 APInt MaxVal = APInt::getMaxValue(VTSize);
11273 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11274 dl, VT);
11275 Result = DAG.getSelectCC(dl, Hi, LowMask,
11276 DAG.getConstant(MaxVal, dl, VT), Result,
11277 ISD::SETUGT);
11278
11279 return Result;
11280 }
11281
11282 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11283 // widened multiplication) aren't all ones or all zeroes.
11284
11285 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11286 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11287
11288 if (Scale == 0) {
11289 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11290 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11291 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11292 // Saturated to SatMin if wide product is negative, and SatMax if wide
11293 // product is positive ...
11294 SDValue Zero = DAG.getConstant(0, dl, VT);
11295 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11296 ISD::SETLT);
11297 // ... but only if we overflowed.
11298 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11299 }
11300
11301 // We handled Scale==0 above so all the bits to examine is in Hi.
11302
11303 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11304 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11305 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11306 dl, VT);
11307 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11308 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11309 // which is the same as if (HI < (-1 << (Scale - 1))
11310 SDValue HighMask =
11311 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11312 dl, VT);
11313 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11314 return Result;
11315}
11316
11317SDValue
11319 SDValue LHS, SDValue RHS,
11320 unsigned Scale, SelectionDAG &DAG) const {
11321 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11322 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11323 "Expected a fixed point division opcode");
11324
11325 EVT VT = LHS.getValueType();
11326 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11327 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11328 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11329
11330 // If there is enough room in the type to upscale the LHS or downscale the
11331 // RHS before the division, we can perform it in this type without having to
11332 // resize. For signed operations, the LHS headroom is the number of
11333 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11334 // The headroom for the RHS is the number of trailing zeroes.
11335 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11337 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11338
11339 // For signed saturating operations, we need to be able to detect true integer
11340 // division overflow; that is, when you have MIN / -EPS. However, this
11341 // is undefined behavior and if we emit divisions that could take such
11342 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11343 // example).
11344 // Avoid this by requiring an extra bit so that we never get this case.
11345 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11346 // signed saturating division, we need to emit a whopping 32-bit division.
11347 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11348 return SDValue();
11349
11350 unsigned LHSShift = std::min(LHSLead, Scale);
11351 unsigned RHSShift = Scale - LHSShift;
11352
11353 // At this point, we know that if we shift the LHS up by LHSShift and the
11354 // RHS down by RHSShift, we can emit a regular division with a final scaling
11355 // factor of Scale.
11356
11357 if (LHSShift)
11358 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11359 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11360 if (RHSShift)
11361 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11362 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11363
11364 SDValue Quot;
11365 if (Signed) {
11366 // For signed operations, if the resulting quotient is negative and the
11367 // remainder is nonzero, subtract 1 from the quotient to round towards
11368 // negative infinity.
11369 SDValue Rem;
11370 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11371 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11372 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11373 if (isTypeLegal(VT) &&
11375 Quot = DAG.getNode(ISD::SDIVREM, dl,
11376 DAG.getVTList(VT, VT),
11377 LHS, RHS);
11378 Rem = Quot.getValue(1);
11379 Quot = Quot.getValue(0);
11380 } else {
11381 Quot = DAG.getNode(ISD::SDIV, dl, VT,
11382 LHS, RHS);
11383 Rem = DAG.getNode(ISD::SREM, dl, VT,
11384 LHS, RHS);
11385 }
11386 SDValue Zero = DAG.getConstant(0, dl, VT);
11387 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11388 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11389 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11390 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11391 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11392 DAG.getConstant(1, dl, VT));
11393 Quot = DAG.getSelect(dl, VT,
11394 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11395 Sub1, Quot);
11396 } else
11397 Quot = DAG.getNode(ISD::UDIV, dl, VT,
11398 LHS, RHS);
11399
11400 return Quot;
11401}
11402
11404 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11405 SDLoc dl(Node);
11406 SDValue LHS = Node->getOperand(0);
11407 SDValue RHS = Node->getOperand(1);
11408 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11409
11410 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11411 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11412 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11413 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11414 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11415 { LHS, RHS, CarryIn });
11416 Result = SDValue(NodeCarry.getNode(), 0);
11417 Overflow = SDValue(NodeCarry.getNode(), 1);
11418 return;
11419 }
11420
11421 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11422 LHS.getValueType(), LHS, RHS);
11423
11424 EVT ResultType = Node->getValueType(1);
11425 EVT SetCCType = getSetCCResultType(
11426 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11427 SDValue SetCC;
11428 if (IsAdd && isOneConstant(RHS)) {
11429 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11430 // the live range of X. We assume comparing with 0 is cheap.
11431 // The general case (X + C) < C is not necessarily beneficial. Although we
11432 // reduce the live range of X, we may introduce the materialization of
11433 // constant C.
11434 SetCC =
11435 DAG.getSetCC(dl, SetCCType, Result,
11436 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11437 } else if (IsAdd && isAllOnesConstant(RHS)) {
11438 // Special case: uaddo X, -1 overflows if X != 0.
11439 SetCC =
11440 DAG.getSetCC(dl, SetCCType, LHS,
11441 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11442 } else {
11443 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11444 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11445 }
11446 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11447}
11448
11450 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11451 SDLoc dl(Node);
11452 SDValue LHS = Node->getOperand(0);
11453 SDValue RHS = Node->getOperand(1);
11454 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11455
11456 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11457 LHS.getValueType(), LHS, RHS);
11458
11459 EVT ResultType = Node->getValueType(1);
11460 EVT OType = getSetCCResultType(
11461 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11462
11463 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11464 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11465 if (isOperationLegal(OpcSat, LHS.getValueType())) {
11466 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11467 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11468 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11469 return;
11470 }
11471
11472 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11473
11474 // For an addition, the result should be less than one of the operands (LHS)
11475 // if and only if the other operand (RHS) is negative, otherwise there will
11476 // be overflow.
11477 // For a subtraction, the result should be less than one of the operands
11478 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11479 // otherwise there will be overflow.
11480 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11481 SDValue ConditionRHS =
11482 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11483
11484 Overflow = DAG.getBoolExtOrTrunc(
11485 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11486 ResultType, ResultType);
11487}
11488
11490 SDValue &Overflow, SelectionDAG &DAG) const {
11491 SDLoc dl(Node);
11492 EVT VT = Node->getValueType(0);
11493 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11494 SDValue LHS = Node->getOperand(0);
11495 SDValue RHS = Node->getOperand(1);
11496 bool isSigned = Node->getOpcode() == ISD::SMULO;
11497
11498 // For power-of-two multiplications we can use a simpler shift expansion.
11499 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11500 const APInt &C = RHSC->getAPIntValue();
11501 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11502 if (C.isPowerOf2()) {
11503 // smulo(x, signed_min) is same as umulo(x, signed_min).
11504 bool UseArithShift = isSigned && !C.isMinSignedValue();
11505 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11506 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11507 Overflow = DAG.getSetCC(dl, SetCCVT,
11508 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11509 dl, VT, Result, ShiftAmt),
11510 LHS, ISD::SETNE);
11511 return true;
11512 }
11513 }
11514
11515 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11516 if (VT.isVector())
11517 WideVT =
11519
11520 SDValue BottomHalf;
11521 SDValue TopHalf;
11522 static const unsigned Ops[2][3] =
11525 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11526 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11527 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11528 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11529 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11530 RHS);
11531 TopHalf = BottomHalf.getValue(1);
11532 } else if (isTypeLegal(WideVT)) {
11533 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11534 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11535 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11536 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11537 SDValue ShiftAmt =
11538 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11539 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11540 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11541 } else {
11542 if (VT.isVector())
11543 return false;
11544
11545 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11546 }
11547
11548 Result = BottomHalf;
11549 if (isSigned) {
11550 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11551 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11552 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11553 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11554 } else {
11555 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11556 DAG.getConstant(0, dl, VT), ISD::SETNE);
11557 }
11558
11559 // Truncate the result if SetCC returns a larger type than needed.
11560 EVT RType = Node->getValueType(1);
11561 if (RType.bitsLT(Overflow.getValueType()))
11562 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11563
11564 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11565 "Unexpected result type for S/UMULO legalization");
11566 return true;
11567}
11568
11570 SDLoc dl(Node);
11571 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11572 SDValue Op = Node->getOperand(0);
11573 EVT VT = Op.getValueType();
11574
11575 // Try to use a shuffle reduction for power of two vectors.
11576 if (VT.isPow2VectorType()) {
11578 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11579 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11580 break;
11581
11582 SDValue Lo, Hi;
11583 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11584 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11585 VT = HalfVT;
11586
11587 // Stop if splitting is enough to make the reduction legal.
11588 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
11589 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
11590 Node->getFlags());
11591 }
11592 }
11593
11594 if (VT.isScalableVector())
11596 "Expanding reductions for scalable vectors is undefined.");
11597
11598 EVT EltVT = VT.getVectorElementType();
11599 unsigned NumElts = VT.getVectorNumElements();
11600
11602 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11603
11604 SDValue Res = Ops[0];
11605 for (unsigned i = 1; i < NumElts; i++)
11606 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11607
11608 // Result type may be wider than element type.
11609 if (EltVT != Node->getValueType(0))
11610 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11611 return Res;
11612}
11613
11615 SDLoc dl(Node);
11616 SDValue AccOp = Node->getOperand(0);
11617 SDValue VecOp = Node->getOperand(1);
11618 SDNodeFlags Flags = Node->getFlags();
11619
11620 EVT VT = VecOp.getValueType();
11621 EVT EltVT = VT.getVectorElementType();
11622
11623 if (VT.isScalableVector())
11625 "Expanding reductions for scalable vectors is undefined.");
11626
11627 unsigned NumElts = VT.getVectorNumElements();
11628
11630 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11631
11632 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11633
11634 SDValue Res = AccOp;
11635 for (unsigned i = 0; i < NumElts; i++)
11636 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11637
11638 return Res;
11639}
11640
11642 SelectionDAG &DAG) const {
11643 EVT VT = Node->getValueType(0);
11644 SDLoc dl(Node);
11645 bool isSigned = Node->getOpcode() == ISD::SREM;
11646 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11647 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11648 SDValue Dividend = Node->getOperand(0);
11649 SDValue Divisor = Node->getOperand(1);
11650 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11651 SDVTList VTs = DAG.getVTList(VT, VT);
11652 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11653 return true;
11654 }
11655 if (isOperationLegalOrCustom(DivOpc, VT)) {
11656 // X % Y -> X-X/Y*Y
11657 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11658 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11659 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11660 return true;
11661 }
11662 return false;
11663}
11664
11666 SelectionDAG &DAG) const {
11667 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11668 SDLoc dl(SDValue(Node, 0));
11669 SDValue Src = Node->getOperand(0);
11670
11671 // DstVT is the result type, while SatVT is the size to which we saturate
11672 EVT SrcVT = Src.getValueType();
11673 EVT DstVT = Node->getValueType(0);
11674
11675 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11676 unsigned SatWidth = SatVT.getScalarSizeInBits();
11677 unsigned DstWidth = DstVT.getScalarSizeInBits();
11678 assert(SatWidth <= DstWidth &&
11679 "Expected saturation width smaller than result width");
11680
11681 // Determine minimum and maximum integer values and their corresponding
11682 // floating-point values.
11683 APInt MinInt, MaxInt;
11684 if (IsSigned) {
11685 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11686 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11687 } else {
11688 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11689 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11690 }
11691
11692 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11693 // libcall emission cannot handle this. Large result types will fail.
11694 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11695 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11696 SrcVT = Src.getValueType();
11697 }
11698
11699 const fltSemantics &Sem = SrcVT.getFltSemantics();
11700 APFloat MinFloat(Sem);
11701 APFloat MaxFloat(Sem);
11702
11703 APFloat::opStatus MinStatus =
11704 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11705 APFloat::opStatus MaxStatus =
11706 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11707 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11708 !(MaxStatus & APFloat::opStatus::opInexact);
11709
11710 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11711 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11712
11713 // If the integer bounds are exactly representable as floats and min/max are
11714 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11715 // of comparisons and selects.
11716 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11718 if (AreExactFloatBounds && MinMaxLegal) {
11719 SDValue Clamped = Src;
11720
11721 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11722 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11723 // Clamp by MaxFloat from above. NaN cannot occur.
11724 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11725 // Convert clamped value to integer.
11726 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11727 dl, DstVT, Clamped);
11728
11729 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11730 // which will cast to zero.
11731 if (!IsSigned)
11732 return FpToInt;
11733
11734 // Otherwise, select 0 if Src is NaN.
11735 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11736 EVT SetCCVT =
11737 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11738 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11739 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11740 }
11741
11742 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11743 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11744
11745 // Result of direct conversion. The assumption here is that the operation is
11746 // non-trapping and it's fine to apply it to an out-of-range value if we
11747 // select it away later.
11748 SDValue FpToInt =
11749 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11750
11751 SDValue Select = FpToInt;
11752
11753 EVT SetCCVT =
11754 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11755
11756 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11757 // MinInt if Src is NaN.
11758 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11759 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11760 // If Src OGT MaxFloat, select MaxInt.
11761 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11762 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11763
11764 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11765 // is already zero.
11766 if (!IsSigned)
11767 return Select;
11768
11769 // Otherwise, select 0 if Src is NaN.
11770 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11771 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11772 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11773}
11774
11776 const SDLoc &dl,
11777 SelectionDAG &DAG) const {
11778 EVT OperandVT = Op.getValueType();
11779 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11780 return Op;
11781 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11782 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11783 // can induce double-rounding which may alter the results. We can
11784 // correct for this using a trick explained in: Boldo, Sylvie, and
11785 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11786 // World Congress. 2005.
11787 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
11788 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
11789
11790 // We can keep the narrow value as-is if narrowing was exact (no
11791 // rounding error), the wide value was NaN (the narrow value is also
11792 // NaN and should be preserved) or if we rounded to the odd value.
11793 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
11794 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11795 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11796 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11797 EVT ResultIntVTCCVT = getSetCCResultType(
11798 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11799 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11800 // The result is already odd so we don't need to do anything.
11801 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11802
11803 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11804 Op.getValueType());
11805 // We keep results which are exact, odd or NaN.
11806 SDValue KeepNarrow =
11807 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
11808 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11809 // We morally performed a round-down if AbsNarrow is smaller than
11810 // AbsWide.
11811 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11812 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
11813 SDValue NarrowIsRd =
11814 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11815 // If the narrow value is odd or exact, pick it.
11816 // Otherwise, narrow is even and corresponds to either the rounded-up
11817 // or rounded-down value. If narrow is the rounded-down value, we want
11818 // the rounded-up value as it will be odd.
11819 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11820 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11821 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11822 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11823}
11824
11826 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11827 SDValue Op = Node->getOperand(0);
11828 EVT VT = Node->getValueType(0);
11829 SDLoc dl(Node);
11830 if (VT.getScalarType() == MVT::bf16) {
11831 if (Node->getConstantOperandVal(1) == 1) {
11832 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11833 }
11834 EVT OperandVT = Op.getValueType();
11835 SDValue IsNaN = DAG.getSetCC(
11836 dl,
11837 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11838 Op, Op, ISD::SETUO);
11839
11840 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11841 // can induce double-rounding which may alter the results. We can
11842 // correct for this using a trick explained in: Boldo, Sylvie, and
11843 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11844 // World Congress. 2005.
11845 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11846 EVT I32 = F32.changeTypeToInteger();
11847 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11848 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11849
11850 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11851 // turning into infinities.
11852 SDValue NaN =
11853 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11854
11855 // Factor in the contribution of the low 16 bits.
11856 SDValue One = DAG.getConstant(1, dl, I32);
11857 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11858 DAG.getShiftAmountConstant(16, I32, dl));
11859 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11860 SDValue RoundingBias =
11861 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11862 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11863
11864 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11865 // 0x80000000.
11866 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11867
11868 // Now that we have rounded, shift the bits into position.
11869 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11870 DAG.getShiftAmountConstant(16, I32, dl));
11871 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11872 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11873 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11874 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11875 }
11876 return SDValue();
11877}
11878
11880 SelectionDAG &DAG) const {
11881 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11882 assert(Node->getValueType(0).isScalableVector() &&
11883 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11884
11885 EVT VT = Node->getValueType(0);
11886 SDValue V1 = Node->getOperand(0);
11887 SDValue V2 = Node->getOperand(1);
11888 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11889 SDLoc DL(Node);
11890
11891 // Expand through memory thusly:
11892 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11893 // Store V1, Ptr
11894 // Store V2, Ptr + sizeof(V1)
11895 // If (Imm < 0)
11896 // TrailingElts = -Imm
11897 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11898 // else
11899 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11900 // Res = Load Ptr
11901
11902 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11903
11905 VT.getVectorElementCount() * 2);
11906 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11907 EVT PtrVT = StackPtr.getValueType();
11908 auto &MF = DAG.getMachineFunction();
11909 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11910 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11911
11912 // Store the lo part of CONCAT_VECTORS(V1, V2)
11913 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11914 // Store the hi part of CONCAT_VECTORS(V1, V2)
11915 SDValue OffsetToV2 = DAG.getVScale(
11916 DL, PtrVT,
11918 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11919 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11920
11921 if (Imm >= 0) {
11922 // Load back the required element. getVectorElementPointer takes care of
11923 // clamping the index if it's out-of-bounds.
11924 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11925 // Load the spliced result
11926 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11928 }
11929
11930 uint64_t TrailingElts = -Imm;
11931
11932 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11933 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11934 SDValue TrailingBytes =
11935 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11936
11937 if (TrailingElts > VT.getVectorMinNumElements()) {
11938 SDValue VLBytes =
11939 DAG.getVScale(DL, PtrVT,
11940 APInt(PtrVT.getFixedSizeInBits(),
11942 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11943 }
11944
11945 // Calculate the start address of the spliced result.
11946 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11947
11948 // Load the spliced result
11949 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11951}
11952
11954 SelectionDAG &DAG) const {
11955 SDLoc DL(Node);
11956 SDValue Vec = Node->getOperand(0);
11957 SDValue Mask = Node->getOperand(1);
11958 SDValue Passthru = Node->getOperand(2);
11959
11960 EVT VecVT = Vec.getValueType();
11961 EVT ScalarVT = VecVT.getScalarType();
11962 EVT MaskVT = Mask.getValueType();
11963 EVT MaskScalarVT = MaskVT.getScalarType();
11964
11965 // Needs to be handled by targets that have scalable vector types.
11966 if (VecVT.isScalableVector())
11967 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11968
11969 SDValue StackPtr = DAG.CreateStackTemporary(
11970 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11971 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11972 MachinePointerInfo PtrInfo =
11974
11975 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11976 SDValue Chain = DAG.getEntryNode();
11977 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11978
11979 bool HasPassthru = !Passthru.isUndef();
11980
11981 // If we have a passthru vector, store it on the stack, overwrite the matching
11982 // positions and then re-write the last element that was potentially
11983 // overwritten even though mask[i] = false.
11984 if (HasPassthru)
11985 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11986
11987 SDValue LastWriteVal;
11988 APInt PassthruSplatVal;
11989 bool IsSplatPassthru =
11990 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11991
11992 if (IsSplatPassthru) {
11993 // As we do not know which position we wrote to last, we cannot simply
11994 // access that index from the passthru vector. So we first check if passthru
11995 // is a splat vector, to use any element ...
11996 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11997 } else if (HasPassthru) {
11998 // ... if it is not a splat vector, we need to get the passthru value at
11999 // position = popcount(mask) and re-load it from the stack before it is
12000 // overwritten in the loop below.
12001 EVT PopcountVT = ScalarVT.changeTypeToInteger();
12002 SDValue Popcount = DAG.getNode(
12003 ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
12004 Popcount =
12006 MaskVT.changeVectorElementType(PopcountVT), Popcount);
12007 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
12008 SDValue LastElmtPtr =
12009 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
12010 LastWriteVal = DAG.getLoad(
12011 ScalarVT, DL, Chain, LastElmtPtr,
12013 Chain = LastWriteVal.getValue(1);
12014 }
12015
12016 unsigned NumElms = VecVT.getVectorNumElements();
12017 for (unsigned I = 0; I < NumElms; I++) {
12018 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
12019 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12020 Chain = DAG.getStore(
12021 Chain, DL, ValI, OutPtr,
12023
12024 // Get the mask value and add it to the current output position. This
12025 // either increments by 1 if MaskI is true or adds 0 otherwise.
12026 // Freeze in case we have poison/undef mask entries.
12027 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
12028 MaskI = DAG.getFreeze(MaskI);
12029 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
12030 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
12031 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
12032
12033 if (HasPassthru && I == NumElms - 1) {
12034 SDValue EndOfVector =
12035 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
12036 SDValue AllLanesSelected =
12037 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
12038 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
12039 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12040
12041 // Re-write the last ValI if all lanes were selected. Otherwise,
12042 // overwrite the last write it with the passthru value.
12043 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
12044 LastWriteVal, SDNodeFlags::Unpredictable);
12045 Chain = DAG.getStore(
12046 Chain, DL, LastWriteVal, OutPtr,
12048 }
12049 }
12050
12051 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12052}
12053
12055 SelectionDAG &DAG) const {
12056 SDLoc DL(N);
12057 SDValue Acc = N->getOperand(0);
12058 SDValue MulLHS = N->getOperand(1);
12059 SDValue MulRHS = N->getOperand(2);
12060 EVT AccVT = Acc.getValueType();
12061 EVT MulOpVT = MulLHS.getValueType();
12062
12063 EVT ExtMulOpVT =
12065 MulOpVT.getVectorElementCount());
12066
12067 unsigned ExtOpcLHS = N->getOpcode() == ISD::PARTIAL_REDUCE_UMLA
12070 unsigned ExtOpcRHS = N->getOpcode() == ISD::PARTIAL_REDUCE_SMLA
12073
12074 if (ExtMulOpVT != MulOpVT) {
12075 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
12076 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
12077 }
12078 SDValue Input = MulLHS;
12079 APInt ConstantOne;
12080 if (!ISD::isConstantSplatVector(MulRHS.getNode(), ConstantOne) ||
12081 !ConstantOne.isOne())
12082 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12083
12084 unsigned Stride = AccVT.getVectorMinNumElements();
12085 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12086
12087 // Collect all of the subvectors
12088 std::deque<SDValue> Subvectors = {Acc};
12089 for (unsigned I = 0; I < ScaleFactor; I++)
12090 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
12091
12092 // Flatten the subvector tree
12093 while (Subvectors.size() > 1) {
12094 Subvectors.push_back(
12095 DAG.getNode(ISD::ADD, DL, AccVT, {Subvectors[0], Subvectors[1]}));
12096 Subvectors.pop_front();
12097 Subvectors.pop_front();
12098 }
12099
12100 assert(Subvectors.size() == 1 &&
12101 "There should only be one subvector after tree flattening");
12102
12103 return Subvectors[0];
12104}
12105
12107 SDValue &LHS, SDValue &RHS,
12108 SDValue &CC, SDValue Mask,
12109 SDValue EVL, bool &NeedInvert,
12110 const SDLoc &dl, SDValue &Chain,
12111 bool IsSignaling) const {
12112 MVT OpVT = LHS.getSimpleValueType();
12113 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
12114 NeedInvert = false;
12115 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12116 bool IsNonVP = !EVL;
12117 switch (getCondCodeAction(CCCode, OpVT)) {
12118 default:
12119 llvm_unreachable("Unknown condition code action!");
12121 // Nothing to do.
12122 break;
12125 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12126 std::swap(LHS, RHS);
12127 CC = DAG.getCondCode(InvCC);
12128 return true;
12129 }
12130 // Swapping operands didn't work. Try inverting the condition.
12131 bool NeedSwap = false;
12132 InvCC = getSetCCInverse(CCCode, OpVT);
12133 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
12134 // If inverting the condition is not enough, try swapping operands
12135 // on top of it.
12136 InvCC = ISD::getSetCCSwappedOperands(InvCC);
12137 NeedSwap = true;
12138 }
12139 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12140 CC = DAG.getCondCode(InvCC);
12141 NeedInvert = true;
12142 if (NeedSwap)
12143 std::swap(LHS, RHS);
12144 return true;
12145 }
12146
12147 // Special case: expand i1 comparisons using logical operations.
12148 if (OpVT == MVT::i1) {
12149 SDValue Ret;
12150 switch (CCCode) {
12151 default:
12152 llvm_unreachable("Unknown integer setcc!");
12153 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12154 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
12155 MVT::i1);
12156 break;
12157 case ISD::SETNE: // X != Y --> (X ^ Y)
12158 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
12159 break;
12160 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12161 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12162 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
12163 DAG.getNOT(dl, LHS, MVT::i1));
12164 break;
12165 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12166 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12167 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
12168 DAG.getNOT(dl, RHS, MVT::i1));
12169 break;
12170 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12171 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12172 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
12173 DAG.getNOT(dl, LHS, MVT::i1));
12174 break;
12175 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12176 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12177 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
12178 DAG.getNOT(dl, RHS, MVT::i1));
12179 break;
12180 }
12181
12182 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
12183 RHS = SDValue();
12184 CC = SDValue();
12185 return true;
12186 }
12187
12189 unsigned Opc = 0;
12190 switch (CCCode) {
12191 default:
12192 llvm_unreachable("Don't know how to expand this condition!");
12193 case ISD::SETUO:
12194 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
12195 CC1 = ISD::SETUNE;
12196 CC2 = ISD::SETUNE;
12197 Opc = ISD::OR;
12198 break;
12199 }
12201 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12202 NeedInvert = true;
12203 [[fallthrough]];
12204 case ISD::SETO:
12206 "If SETO is expanded, SETOEQ must be legal!");
12207 CC1 = ISD::SETOEQ;
12208 CC2 = ISD::SETOEQ;
12209 Opc = ISD::AND;
12210 break;
12211 case ISD::SETONE:
12212 case ISD::SETUEQ:
12213 // If the SETUO or SETO CC isn't legal, we might be able to use
12214 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12215 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12216 // the operands.
12217 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12218 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
12219 isCondCodeLegal(ISD::SETOLT, OpVT))) {
12220 CC1 = ISD::SETOGT;
12221 CC2 = ISD::SETOLT;
12222 Opc = ISD::OR;
12223 NeedInvert = ((unsigned)CCCode & 0x8U);
12224 break;
12225 }
12226 [[fallthrough]];
12227 case ISD::SETOEQ:
12228 case ISD::SETOGT:
12229 case ISD::SETOGE:
12230 case ISD::SETOLT:
12231 case ISD::SETOLE:
12232 case ISD::SETUNE:
12233 case ISD::SETUGT:
12234 case ISD::SETUGE:
12235 case ISD::SETULT:
12236 case ISD::SETULE:
12237 // If we are floating point, assign and break, otherwise fall through.
12238 if (!OpVT.isInteger()) {
12239 // We can use the 4th bit to tell if we are the unordered
12240 // or ordered version of the opcode.
12241 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12242 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12243 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12244 break;
12245 }
12246 // Fallthrough if we are unsigned integer.
12247 [[fallthrough]];
12248 case ISD::SETLE:
12249 case ISD::SETGT:
12250 case ISD::SETGE:
12251 case ISD::SETLT:
12252 case ISD::SETNE:
12253 case ISD::SETEQ:
12254 // If all combinations of inverting the condition and swapping operands
12255 // didn't work then we have no means to expand the condition.
12256 llvm_unreachable("Don't know how to expand this condition!");
12257 }
12258
12259 SDValue SetCC1, SetCC2;
12260 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12261 // If we aren't the ordered or unorder operation,
12262 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12263 if (IsNonVP) {
12264 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
12265 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
12266 } else {
12267 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
12268 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
12269 }
12270 } else {
12271 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12272 if (IsNonVP) {
12273 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
12274 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
12275 } else {
12276 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
12277 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
12278 }
12279 }
12280 if (Chain)
12281 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12282 SetCC2.getValue(1));
12283 if (IsNonVP)
12284 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12285 else {
12286 // Transform the binary opcode to the VP equivalent.
12287 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12288 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12289 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12290 }
12291 RHS = SDValue();
12292 CC = SDValue();
12293 return true;
12294 }
12295 }
12296 return false;
12297}
12298
12300 SelectionDAG &DAG) const {
12301 EVT VT = Node->getValueType(0);
12302 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12303 // split into two equal parts.
12304 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12305 return SDValue();
12306
12307 // Restrict expansion to cases where both parts can be concatenated.
12308 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12309 if (LoVT != HiVT || !isTypeLegal(LoVT))
12310 return SDValue();
12311
12312 SDLoc DL(Node);
12313 unsigned Opcode = Node->getOpcode();
12314
12315 // Don't expand if the result is likely to be unrolled anyway.
12316 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12317 return SDValue();
12318
12319 SmallVector<SDValue, 4> LoOps, HiOps;
12320 for (const SDValue &V : Node->op_values()) {
12321 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
12322 LoOps.push_back(Lo);
12323 HiOps.push_back(Hi);
12324 }
12325
12326 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
12327 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
12328 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
12329}
12330
12332 const SDLoc &DL,
12333 EVT InVecVT, SDValue EltNo,
12334 LoadSDNode *OriginalLoad,
12335 SelectionDAG &DAG) const {
12336 assert(OriginalLoad->isSimple());
12337
12338 EVT VecEltVT = InVecVT.getVectorElementType();
12339
12340 // If the vector element type is not a multiple of a byte then we are unable
12341 // to correctly compute an address to load only the extracted element as a
12342 // scalar.
12343 if (!VecEltVT.isByteSized())
12344 return SDValue();
12345
12346 ISD::LoadExtType ExtTy =
12347 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12348 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
12349 return SDValue();
12350
12351 std::optional<unsigned> ByteOffset;
12352 Align Alignment = OriginalLoad->getAlign();
12354 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
12355 int Elt = ConstEltNo->getZExtValue();
12356 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
12357 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
12358 Alignment = commonAlignment(Alignment, *ByteOffset);
12359 } else {
12360 // Discard the pointer info except the address space because the memory
12361 // operand can't represent this new access since the offset is variable.
12362 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
12363 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
12364 }
12365
12366 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
12367 return SDValue();
12368
12369 unsigned IsFast = 0;
12370 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
12371 OriginalLoad->getAddressSpace(), Alignment,
12372 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
12373 !IsFast)
12374 return SDValue();
12375
12376 SDValue NewPtr =
12377 getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
12378
12379 // We are replacing a vector load with a scalar load. The new load must have
12380 // identical memory op ordering to the original.
12381 SDValue Load;
12382 if (ResultVT.bitsGT(VecEltVT)) {
12383 // If the result type of vextract is wider than the load, then issue an
12384 // extending load instead.
12385 ISD::LoadExtType ExtType = isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT)
12387 : ISD::EXTLOAD;
12388 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
12389 NewPtr, MPI, VecEltVT, Alignment,
12390 OriginalLoad->getMemOperand()->getFlags(),
12391 OriginalLoad->getAAInfo());
12392 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12393 } else {
12394 // The result type is narrower or the same width as the vector element
12395 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
12396 Alignment, OriginalLoad->getMemOperand()->getFlags(),
12397 OriginalLoad->getAAInfo());
12398 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12399 if (ResultVT.bitsLT(VecEltVT))
12400 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
12401 else
12402 Load = DAG.getBitcast(ResultVT, Load);
12403 }
12404
12405 return Load;
12406}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T1
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
Function const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1347
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:1158
APInt bitcastToAPInt() const
Definition: APFloat.h:1353
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1138
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:1098
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1109
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition: APInt.cpp:1573
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1758
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1406
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:449
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:423
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition: APInt.h:1391
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1385
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1666
void setSignBit()
Set the sign bit to 1.
Definition: APInt.h:1340
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:216
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1249
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1396
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:834
void negate()
Negate this APInt in place.
Definition: APInt.h:1468
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1598
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1531
unsigned countLeadingZeros() const
Definition: APInt.h:1606
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:356
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:397
unsigned logBase2() const
Definition: APInt.h:1761
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:827
void setAllBits()
Set every bit to 1.
Definition: APInt.h:1319
LLVM_ABI APInt multiplicativeInverse() const
Definition: APInt.cpp:1274
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition: APInt.h:405
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:334
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1150
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:985
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition: APInt.h:1367
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:873
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition: APInt.h:1417
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1388
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition: APInt.h:1442
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition: APInt.h:1343
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1137
LLVM_ABI bool contains(Attribute::AttrKind A) const
Return true if the builder has the specified attribute.
LLVM_ABI AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
LLVM_ABI bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:715
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:277
This class represents a range of values.
Definition: ConstantRange.h:47
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:198
bool isBigEndian() const
Definition: DataLayout.h:199
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:846
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:352
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
std::vector< std::string > ConstraintCodeVector
Definition: InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:319
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition: MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition: Module.h:445
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
iterator end() const
Definition: ArrayRef.h:348
iterator begin() const
Definition: ArrayRef.h:347
Class to represent pointers.
Definition: DerivedTypes.h:700
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
Definition: SelectionDAG.h:941
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
Definition: SelectionDAG.h:963
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:504
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:459
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:868
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:498
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:902
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:499
LLVM_ABI bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:808
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:493
LLVM_ABI std::optional< uint64_t > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVM_ABI std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:511
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:581
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:918
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:581
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:269
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:154
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:148
iterator end() const
Definition: StringRef.h:122
Class to represent struct types.
Definition: DerivedTypes.h:218
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:771
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:296
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:311
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
LLVM_ABI const fltSemantics & getFltSemantics() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition: Value.cpp:705
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:184
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:203
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition: APInt.cpp:3009
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:774
@ PARTIAL_REDUCE_SMLA
Definition: ISDOpcodes.h:1510
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:525
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:765
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:387
@ ConstantFP
Definition: ISDOpcodes.h:87
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:515
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:393
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:738
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:892
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:275
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:249
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition: ISDOpcodes.h:400
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1568
@ PARTIAL_REDUCE_UMLA
Definition: ISDOpcodes.h:1511
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:706
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:773
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:1002
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:347
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1162
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:778
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1492
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:663
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:343
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1485
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:601
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1075
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:452
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:563
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:832
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:453
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:793
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
Definition: ISDOpcodes.h:1059
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:379
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:351
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:881
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:718
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:406
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:960
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:787
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:323
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:470
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1081
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:908
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:701
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:299
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:53
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:648
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:941
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:903
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:360
@ AssertZext
Definition: ISDOpcodes.h:63
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1086
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:543
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
Definition: ISDOpcodes.h:1743
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
Definition: ISDOpcodes.h:1748
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1718
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1685
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1665
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1724
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:962
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
void stable_sort(R &&Range)
Definition: STLExtras.h:2077
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:551
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:295
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition: Error.cpp:177
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition: STLExtras.h:1782
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
LLVM_ABI bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Definition: Utils.cpp:1665
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition: APFloat.h:1569
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:378
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition: Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:304
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:308
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:320
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition: ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:425
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:465
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:407
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:216
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:330
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:448
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber/label.
Definition: InlineAsm.h:128
int MatchingInput
MatchingInput - If this is not -1, this is an output constraint where an input constraint is required...
Definition: InlineAsm.h:138
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number.
Definition: InlineAsm.h:156
SubConstraintInfoVector multipleAlternatives
multipleAlternatives - If there are multiple alternative constraints, this array will contain them.
Definition: InlineAsm.h:163
bool isIndirect
isIndirect - True if this operand is an indirect operand.
Definition: InlineAsm.h:152
bool hasMatchingInput() const
hasMatchingInput - Return true if this is an output constraint that has a matching input constraint.
Definition: InlineAsm.h:142
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:294
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:179
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition: KnownBits.h:248
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
Definition: KnownBits.cpp:211
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:101
bool isZero() const
Returns true if value is all zero.
Definition: KnownBits.h:80
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:235
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:66
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:154
KnownBits byteSwap() const
Definition: KnownBits.h:507
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:535
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:282
KnownBits reverseBits() const
Definition: KnownBits.h:511
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition: KnownBits.h:226
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
Definition: KnownBits.cpp:187
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:165
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:74
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition: KnownBits.h:314
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition: KnownBits.h:69
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:304
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:173
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:241
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
Definition: KnownBits.cpp:215
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:501
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:541
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:60
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:517
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:521
bool isNegative() const
Returns true if this value is known to be negative.
Definition: KnownBits.h:98
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:803
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition: KnownBits.h:160
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:545
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:525
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:279
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:511
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Definition: KnownBits.cpp:205
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
MVT ConstraintVT
The ValueType for the operand value.
TargetLowering::ConstraintType ConstraintType
Information about the constraint code, e.g.
std::string ConstraintCode
This contains the actual string for the code, like "m".
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...