LLVM 22.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
46
47// Define the virtual destructor out-of-line for build efficiency.
49
50const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
51 return nullptr;
52}
53
57
58/// Check whether a given call node is in tail position within its function. If
59/// so, it sets Chain to the input chain of the tail call.
61 SDValue &Chain) const {
63
64 // First, check if tail calls have been disabled in this function.
65 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
66 return false;
67
68 // Conservatively require the attributes of the call to match those of
69 // the return. Ignore following attributes because they don't affect the
70 // call sequence.
71 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
72 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
73 Attribute::DereferenceableOrNull, Attribute::NoAlias,
74 Attribute::NonNull, Attribute::NoUndef,
75 Attribute::Range, Attribute::NoFPClass})
76 CallerAttrs.removeAttribute(Attr);
77
78 if (CallerAttrs.hasAttributes())
79 return false;
80
81 // It's not safe to eliminate the sign / zero extension of the return value.
82 if (CallerAttrs.contains(Attribute::ZExt) ||
83 CallerAttrs.contains(Attribute::SExt))
84 return false;
85
86 // Check if the only use is a function return node.
87 return isUsedByReturnOnly(Node, Chain);
88}
89
91 const uint32_t *CallerPreservedMask,
92 const SmallVectorImpl<CCValAssign> &ArgLocs,
93 const SmallVectorImpl<SDValue> &OutVals) const {
94 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
95 const CCValAssign &ArgLoc = ArgLocs[I];
96 if (!ArgLoc.isRegLoc())
97 continue;
98 MCRegister Reg = ArgLoc.getLocReg();
99 // Only look at callee saved registers.
100 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
101 continue;
102 // Check that we pass the value used for the caller.
103 // (We look for a CopyFromReg reading a virtual register that is used
104 // for the function live-in value of register Reg)
105 SDValue Value = OutVals[I];
106 if (Value->getOpcode() == ISD::AssertZext)
107 Value = Value.getOperand(0);
108 if (Value->getOpcode() != ISD::CopyFromReg)
109 return false;
110 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
111 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
112 return false;
113 }
114 return true;
115}
116
117/// Set CallLoweringInfo attribute flags based on a call instruction
118/// and called function attributes.
120 unsigned ArgIdx) {
121 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
122 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
123 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
124 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
125 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
126 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
127 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
128 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
129 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
130 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
131 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
132 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
133 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
134 Alignment = Call->getParamStackAlign(ArgIdx);
135 IndirectType = nullptr;
137 "multiple ABI attributes?");
138 if (IsByVal) {
139 IndirectType = Call->getParamByValType(ArgIdx);
140 if (!Alignment)
141 Alignment = Call->getParamAlign(ArgIdx);
142 }
143 if (IsPreallocated)
144 IndirectType = Call->getParamPreallocatedType(ArgIdx);
145 if (IsInAlloca)
146 IndirectType = Call->getParamInAllocaType(ArgIdx);
147 if (IsSRet)
148 IndirectType = Call->getParamStructRetType(ArgIdx);
149}
150
151/// Generate a libcall taking the given operands as arguments and returning a
152/// result of type RetVT.
153std::pair<SDValue, SDValue>
154TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
156 MakeLibCallOptions CallOptions,
157 const SDLoc &dl,
158 SDValue InChain) const {
159 if (!InChain)
160 InChain = DAG.getEntryNode();
161
163 Args.reserve(Ops.size());
164
165 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
166 for (unsigned i = 0; i < Ops.size(); ++i) {
167 SDValue NewOp = Ops[i];
168 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
169 ? OpsTypeOverrides[i]
170 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
171 TargetLowering::ArgListEntry Entry(NewOp, Ty);
172 if (CallOptions.IsSoften)
173 Entry.OrigTy =
174 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
175
176 Entry.IsSExt =
177 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
178 Entry.IsZExt = !Entry.IsSExt;
179
180 if (CallOptions.IsSoften &&
182 Entry.IsSExt = Entry.IsZExt = false;
183 }
184 Args.push_back(Entry);
185 }
186
187 const char *LibcallName = getLibcallName(LC);
188 if (LC == RTLIB::UNKNOWN_LIBCALL || !LibcallName)
189 reportFatalInternalError("unsupported library call operation");
190
191 SDValue Callee =
192 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
193
194 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
195 Type *OrigRetTy = RetTy;
197 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
198 bool zeroExtend = !signExtend;
199
200 if (CallOptions.IsSoften) {
201 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
203 signExtend = zeroExtend = false;
204 }
205
206 CLI.setDebugLoc(dl)
207 .setChain(InChain)
208 .setLibCallee(getLibcallCallingConv(LC), RetTy, OrigRetTy, Callee,
209 std::move(Args))
210 .setNoReturn(CallOptions.DoesNotReturn)
213 .setSExtResult(signExtend)
214 .setZExtResult(zeroExtend);
215 return LowerCallTo(CLI);
216}
217
219 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
220 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
221 const AttributeList &FuncAttributes) const {
222 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
223 Op.getSrcAlign() < Op.getDstAlign())
224 return false;
225
226 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
227
228 if (VT == MVT::Other) {
229 // Use the largest integer type whose alignment constraints are satisfied.
230 // We only need to check DstAlign here as SrcAlign is always greater or
231 // equal to DstAlign (or zero).
232 VT = MVT::LAST_INTEGER_VALUETYPE;
233 if (Op.isFixedDstAlign())
234 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
235 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
237 assert(VT.isInteger());
238
239 // Find the largest legal integer type.
240 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
241 while (!isTypeLegal(LVT))
242 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
243 assert(LVT.isInteger());
244
245 // If the type we've chosen is larger than the largest legal integer type
246 // then use that instead.
247 if (VT.bitsGT(LVT))
248 VT = LVT;
249 }
250
251 unsigned NumMemOps = 0;
252 uint64_t Size = Op.size();
253 while (Size) {
254 unsigned VTSize = VT.getSizeInBits() / 8;
255 while (VTSize > Size) {
256 // For now, only use non-vector load / store's for the left-over pieces.
257 EVT NewVT = VT;
258 unsigned NewVTSize;
259
260 bool Found = false;
261 if (VT.isVector() || VT.isFloatingPoint()) {
262 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
263 if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
265 Found = true;
266 else if (NewVT == MVT::i64 &&
267 isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
268 isSafeMemOpType(MVT::f64)) {
269 // i64 is usually not legal on 32-bit targets, but f64 may be.
270 NewVT = MVT::f64;
271 Found = true;
272 }
273 }
274
275 if (!Found) {
276 do {
277 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
278 if (NewVT == MVT::i8)
279 break;
280 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
281 }
282 NewVTSize = NewVT.getSizeInBits() / 8;
283
284 // If the new VT cannot cover all of the remaining bits, then consider
285 // issuing a (or a pair of) unaligned and overlapping load / store.
286 unsigned Fast;
287 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
289 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
291 Fast)
292 VTSize = Size;
293 else {
294 VT = NewVT;
295 VTSize = NewVTSize;
296 }
297 }
298
299 if (++NumMemOps > Limit)
300 return false;
301
302 MemOps.push_back(VT);
303 Size -= VTSize;
304 }
305
306 return true;
307}
308
309/// Soften the operands of a comparison. This code is shared among BR_CC,
310/// SELECT_CC, and SETCC handlers.
312 SDValue &NewLHS, SDValue &NewRHS,
313 ISD::CondCode &CCCode,
314 const SDLoc &dl, const SDValue OldLHS,
315 const SDValue OldRHS) const {
316 SDValue Chain;
317 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
318 OldRHS, Chain);
319}
320
322 SDValue &NewLHS, SDValue &NewRHS,
323 ISD::CondCode &CCCode,
324 const SDLoc &dl, const SDValue OldLHS,
325 const SDValue OldRHS,
326 SDValue &Chain,
327 bool IsSignaling) const {
328 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
329 // not supporting it. We can update this code when libgcc provides such
330 // functions.
331
332 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
333 && "Unsupported setcc type!");
334
335 // Expand into one or more soft-fp libcall(s).
336 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
337 bool ShouldInvertCC = false;
338 switch (CCCode) {
339 case ISD::SETEQ:
340 case ISD::SETOEQ:
341 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
342 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
343 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
344 break;
345 case ISD::SETNE:
346 case ISD::SETUNE:
347 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
348 (VT == MVT::f64) ? RTLIB::UNE_F64 :
349 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
350 break;
351 case ISD::SETGE:
352 case ISD::SETOGE:
353 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
354 (VT == MVT::f64) ? RTLIB::OGE_F64 :
355 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
356 break;
357 case ISD::SETLT:
358 case ISD::SETOLT:
359 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
360 (VT == MVT::f64) ? RTLIB::OLT_F64 :
361 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
362 break;
363 case ISD::SETLE:
364 case ISD::SETOLE:
365 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
366 (VT == MVT::f64) ? RTLIB::OLE_F64 :
367 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
368 break;
369 case ISD::SETGT:
370 case ISD::SETOGT:
371 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
372 (VT == MVT::f64) ? RTLIB::OGT_F64 :
373 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
374 break;
375 case ISD::SETO:
376 ShouldInvertCC = true;
377 [[fallthrough]];
378 case ISD::SETUO:
379 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
380 (VT == MVT::f64) ? RTLIB::UO_F64 :
381 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
382 break;
383 case ISD::SETONE:
384 // SETONE = O && UNE
385 ShouldInvertCC = true;
386 [[fallthrough]];
387 case ISD::SETUEQ:
388 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
389 (VT == MVT::f64) ? RTLIB::UO_F64 :
390 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
391 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
392 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
393 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
394 break;
395 default:
396 // Invert CC for unordered comparisons
397 ShouldInvertCC = true;
398 switch (CCCode) {
399 case ISD::SETULT:
400 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
401 (VT == MVT::f64) ? RTLIB::OGE_F64 :
402 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
403 break;
404 case ISD::SETULE:
405 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
406 (VT == MVT::f64) ? RTLIB::OGT_F64 :
407 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
408 break;
409 case ISD::SETUGT:
410 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
411 (VT == MVT::f64) ? RTLIB::OLE_F64 :
412 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
413 break;
414 case ISD::SETUGE:
415 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
416 (VT == MVT::f64) ? RTLIB::OLT_F64 :
417 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
418 break;
419 default: llvm_unreachable("Do not know how to soften this setcc!");
420 }
421 }
422
423 // Use the target specific return value for comparison lib calls.
425 SDValue Ops[2] = {NewLHS, NewRHS};
427 EVT OpsVT[2] = { OldLHS.getValueType(),
428 OldRHS.getValueType() };
429 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
430 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
431 NewLHS = Call.first;
432 NewRHS = DAG.getConstant(0, dl, RetVT);
433
434 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
435 if (LC1Impl == RTLIB::Unsupported) {
437 "no libcall available to soften floating-point compare");
438 }
439
440 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
441 if (ShouldInvertCC) {
442 assert(RetVT.isInteger());
443 CCCode = getSetCCInverse(CCCode, RetVT);
444 }
445
446 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
447 // Update Chain.
448 Chain = Call.second;
449 } else {
450 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
451 if (LC2Impl == RTLIB::Unsupported) {
453 "no libcall available to soften floating-point compare");
454 }
455
456 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
457 "unordered call should be simple boolean");
458
459 EVT SetCCVT =
460 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
462 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
463 DAG.getValueType(MVT::i1));
464 }
465
466 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
467 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
468 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
469 if (ShouldInvertCC)
470 CCCode = getSetCCInverse(CCCode, RetVT);
471 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
472 if (Chain)
473 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
474 Call2.second);
475 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
476 Tmp.getValueType(), Tmp, NewLHS);
477 NewRHS = SDValue();
478 }
479}
480
481/// Return the entry encoding for a jump table in the current function. The
482/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
484 // In non-pic modes, just use the address of a block.
487
488 // Otherwise, use a label difference.
490}
491
493 SelectionDAG &DAG) const {
494 return Table;
495}
496
497/// This returns the relocation base for the given PIC jumptable, the same as
498/// getPICJumpTableRelocBase, but as an MCExpr.
499const MCExpr *
501 unsigned JTI,MCContext &Ctx) const{
502 // The normal PIC reloc base is the label at the start of the jump table.
503 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
504}
505
507 SDValue Addr, int JTI,
508 SelectionDAG &DAG) const {
509 SDValue Chain = Value;
510 // Jump table debug info is only needed if CodeView is enabled.
512 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
513 }
514 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
515}
516
517bool
519 const TargetMachine &TM = getTargetMachine();
520 const GlobalValue *GV = GA->getGlobal();
521
522 // If the address is not even local to this DSO we will have to load it from
523 // a got and then add the offset.
524 if (!TM.shouldAssumeDSOLocal(GV))
525 return false;
526
527 // If the code is position independent we will have to add a base register.
529 return false;
530
531 // Otherwise we can do it.
532 return true;
533}
534
535//===----------------------------------------------------------------------===//
536// Optimization Methods
537//===----------------------------------------------------------------------===//
538
539/// If the specified instruction has a constant integer operand and there are
540/// bits set in that constant that are not demanded, then clear those bits and
541/// return true.
543 const APInt &DemandedBits,
544 const APInt &DemandedElts,
545 TargetLoweringOpt &TLO) const {
546 SDLoc DL(Op);
547 unsigned Opcode = Op.getOpcode();
548
549 // Early-out if we've ended up calling an undemanded node, leave this to
550 // constant folding.
551 if (DemandedBits.isZero() || DemandedElts.isZero())
552 return false;
553
554 // Do target-specific constant optimization.
555 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
556 return TLO.New.getNode();
557
558 // FIXME: ISD::SELECT, ISD::SELECT_CC
559 switch (Opcode) {
560 default:
561 break;
562 case ISD::XOR:
563 case ISD::AND:
564 case ISD::OR: {
565 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
566 if (!Op1C || Op1C->isOpaque())
567 return false;
568
569 // If this is a 'not' op, don't touch it because that's a canonical form.
570 const APInt &C = Op1C->getAPIntValue();
571 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
572 return false;
573
574 if (!C.isSubsetOf(DemandedBits)) {
575 EVT VT = Op.getValueType();
576 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
577 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
578 Op->getFlags());
579 return TLO.CombineTo(Op, NewOp);
580 }
581
582 break;
583 }
584 }
585
586 return false;
587}
588
590 const APInt &DemandedBits,
591 TargetLoweringOpt &TLO) const {
592 EVT VT = Op.getValueType();
593 APInt DemandedElts = VT.isVector()
595 : APInt(1, 1);
596 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
597}
598
599/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
600/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
601/// but it could be generalized for targets with other types of implicit
602/// widening casts.
604 const APInt &DemandedBits,
605 TargetLoweringOpt &TLO) const {
606 assert(Op.getNumOperands() == 2 &&
607 "ShrinkDemandedOp only supports binary operators!");
608 assert(Op.getNode()->getNumValues() == 1 &&
609 "ShrinkDemandedOp only supports nodes with one result!");
610
611 EVT VT = Op.getValueType();
612 SelectionDAG &DAG = TLO.DAG;
613 SDLoc dl(Op);
614
615 // Early return, as this function cannot handle vector types.
616 if (VT.isVector())
617 return false;
618
619 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
620 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
621 "ShrinkDemandedOp only supports operands that have the same size!");
622
623 // Don't do this if the node has another user, which may require the
624 // full value.
625 if (!Op.getNode()->hasOneUse())
626 return false;
627
628 // Search for the smallest integer type with free casts to and from
629 // Op's type. For expedience, just check power-of-2 integer types.
630 unsigned DemandedSize = DemandedBits.getActiveBits();
631 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
632 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
633 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
634 if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
635 // We found a type with free casts.
636
637 // If the operation has the 'disjoint' flag, then the
638 // operands on the new node are also disjoint.
639 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
641 unsigned Opcode = Op.getOpcode();
642 if (Opcode == ISD::PTRADD) {
643 // It isn't a ptradd anymore if it doesn't operate on the entire
644 // pointer.
645 Opcode = ISD::ADD;
646 }
647 SDValue X = DAG.getNode(
648 Opcode, dl, SmallVT,
649 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
650 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
651 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
652 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
653 return TLO.CombineTo(Op, Z);
654 }
655 }
656 return false;
657}
658
660 DAGCombinerInfo &DCI) const {
661 SelectionDAG &DAG = DCI.DAG;
662 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
663 !DCI.isBeforeLegalizeOps());
664 KnownBits Known;
665
666 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
667 if (Simplified) {
668 DCI.AddToWorklist(Op.getNode());
670 }
671 return Simplified;
672}
673
675 const APInt &DemandedElts,
676 DAGCombinerInfo &DCI) const {
677 SelectionDAG &DAG = DCI.DAG;
678 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
679 !DCI.isBeforeLegalizeOps());
680 KnownBits Known;
681
682 bool Simplified =
683 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
684 if (Simplified) {
685 DCI.AddToWorklist(Op.getNode());
687 }
688 return Simplified;
689}
690
692 KnownBits &Known,
694 unsigned Depth,
695 bool AssumeSingleUse) const {
696 EVT VT = Op.getValueType();
697
698 // Since the number of lanes in a scalable vector is unknown at compile time,
699 // we track one bit which is implicitly broadcast to all lanes. This means
700 // that all lanes in a scalable vector are considered demanded.
701 APInt DemandedElts = VT.isFixedLengthVector()
703 : APInt(1, 1);
704 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
705 AssumeSingleUse);
706}
707
708// TODO: Under what circumstances can we create nodes? Constant folding?
710 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
711 SelectionDAG &DAG, unsigned Depth) const {
712 EVT VT = Op.getValueType();
713
714 // Limit search depth.
716 return SDValue();
717
718 // Ignore UNDEFs.
719 if (Op.isUndef())
720 return SDValue();
721
722 // Not demanding any bits/elts from Op.
723 if (DemandedBits == 0 || DemandedElts == 0)
724 return DAG.getUNDEF(VT);
725
726 bool IsLE = DAG.getDataLayout().isLittleEndian();
727 unsigned NumElts = DemandedElts.getBitWidth();
728 unsigned BitWidth = DemandedBits.getBitWidth();
729 KnownBits LHSKnown, RHSKnown;
730 switch (Op.getOpcode()) {
731 case ISD::BITCAST: {
732 if (VT.isScalableVector())
733 return SDValue();
734
735 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
736 EVT SrcVT = Src.getValueType();
737 EVT DstVT = Op.getValueType();
738 if (SrcVT == DstVT)
739 return Src;
740
741 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
742 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
743 if (NumSrcEltBits == NumDstEltBits)
745 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
746 return DAG.getBitcast(DstVT, V);
747
748 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
749 unsigned Scale = NumDstEltBits / NumSrcEltBits;
750 unsigned NumSrcElts = SrcVT.getVectorNumElements();
751 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
752 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
753 for (unsigned i = 0; i != Scale; ++i) {
754 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
755 unsigned BitOffset = EltOffset * NumSrcEltBits;
756 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
757 if (!Sub.isZero()) {
758 DemandedSrcBits |= Sub;
759 for (unsigned j = 0; j != NumElts; ++j)
760 if (DemandedElts[j])
761 DemandedSrcElts.setBit((j * Scale) + i);
762 }
763 }
764
766 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
767 return DAG.getBitcast(DstVT, V);
768 }
769
770 // TODO - bigendian once we have test coverage.
771 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
772 unsigned Scale = NumSrcEltBits / NumDstEltBits;
773 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
774 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
775 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
776 for (unsigned i = 0; i != NumElts; ++i)
777 if (DemandedElts[i]) {
778 unsigned Offset = (i % Scale) * NumDstEltBits;
779 DemandedSrcBits.insertBits(DemandedBits, Offset);
780 DemandedSrcElts.setBit(i / Scale);
781 }
782
784 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
785 return DAG.getBitcast(DstVT, V);
786 }
787
788 break;
789 }
790 case ISD::AND: {
791 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
792 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
793
794 // If all of the demanded bits are known 1 on one side, return the other.
795 // These bits cannot contribute to the result of the 'and' in this
796 // context.
797 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
798 return Op.getOperand(0);
799 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
800 return Op.getOperand(1);
801 break;
802 }
803 case ISD::OR: {
804 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
805 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
806
807 // If all of the demanded bits are known zero on one side, return the
808 // other. These bits cannot contribute to the result of the 'or' in this
809 // context.
810 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
811 return Op.getOperand(0);
812 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
813 return Op.getOperand(1);
814 break;
815 }
816 case ISD::XOR: {
817 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
818 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
819
820 // If all of the demanded bits are known zero on one side, return the
821 // other.
822 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
823 return Op.getOperand(0);
824 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
825 return Op.getOperand(1);
826 break;
827 }
828 case ISD::ADD: {
829 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
830 if (RHSKnown.isZero())
831 return Op.getOperand(0);
832
833 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
834 if (LHSKnown.isZero())
835 return Op.getOperand(1);
836 break;
837 }
838 case ISD::SHL: {
839 // If we are only demanding sign bits then we can use the shift source
840 // directly.
841 if (std::optional<unsigned> MaxSA =
842 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
843 SDValue Op0 = Op.getOperand(0);
844 unsigned ShAmt = *MaxSA;
845 unsigned NumSignBits =
846 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
847 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
848 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
849 return Op0;
850 }
851 break;
852 }
853 case ISD::SRL: {
854 // If we are only demanding sign bits then we can use the shift source
855 // directly.
856 if (std::optional<unsigned> MaxSA =
857 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
858 SDValue Op0 = Op.getOperand(0);
859 unsigned ShAmt = *MaxSA;
860 // Must already be signbits in DemandedBits bounds, and can't demand any
861 // shifted in zeroes.
862 if (DemandedBits.countl_zero() >= ShAmt) {
863 unsigned NumSignBits =
864 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
865 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
866 return Op0;
867 }
868 }
869 break;
870 }
871 case ISD::SETCC: {
872 SDValue Op0 = Op.getOperand(0);
873 SDValue Op1 = Op.getOperand(1);
874 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
875 // If (1) we only need the sign-bit, (2) the setcc operands are the same
876 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
877 // -1, we may be able to bypass the setcc.
878 if (DemandedBits.isSignMask() &&
882 // If we're testing X < 0, then this compare isn't needed - just use X!
883 // FIXME: We're limiting to integer types here, but this should also work
884 // if we don't care about FP signed-zero. The use of SETLT with FP means
885 // that we don't care about NaNs.
886 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
888 return Op0;
889 }
890 break;
891 }
893 // If none of the extended bits are demanded, eliminate the sextinreg.
894 SDValue Op0 = Op.getOperand(0);
895 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
896 unsigned ExBits = ExVT.getScalarSizeInBits();
897 if (DemandedBits.getActiveBits() <= ExBits &&
899 return Op0;
900 // If the input is already sign extended, just drop the extension.
901 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
902 if (NumSignBits >= (BitWidth - ExBits + 1))
903 return Op0;
904 break;
905 }
909 if (VT.isScalableVector())
910 return SDValue();
911
912 // If we only want the lowest element and none of extended bits, then we can
913 // return the bitcasted source vector.
914 SDValue Src = Op.getOperand(0);
915 EVT SrcVT = Src.getValueType();
916 EVT DstVT = Op.getValueType();
917 if (IsLE && DemandedElts == 1 &&
918 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
919 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
920 return DAG.getBitcast(DstVT, Src);
921 }
922 break;
923 }
925 if (VT.isScalableVector())
926 return SDValue();
927
928 // If we don't demand the inserted element, return the base vector.
929 SDValue Vec = Op.getOperand(0);
930 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
931 EVT VecVT = Vec.getValueType();
932 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
933 !DemandedElts[CIdx->getZExtValue()])
934 return Vec;
935 break;
936 }
938 if (VT.isScalableVector())
939 return SDValue();
940
941 SDValue Vec = Op.getOperand(0);
942 SDValue Sub = Op.getOperand(1);
943 uint64_t Idx = Op.getConstantOperandVal(2);
944 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
945 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
946 // If we don't demand the inserted subvector, return the base vector.
947 if (DemandedSubElts == 0)
948 return Vec;
949 break;
950 }
951 case ISD::VECTOR_SHUFFLE: {
953 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
954
955 // If all the demanded elts are from one operand and are inline,
956 // then we can use the operand directly.
957 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
958 for (unsigned i = 0; i != NumElts; ++i) {
959 int M = ShuffleMask[i];
960 if (M < 0 || !DemandedElts[i])
961 continue;
962 AllUndef = false;
963 IdentityLHS &= (M == (int)i);
964 IdentityRHS &= ((M - NumElts) == i);
965 }
966
967 if (AllUndef)
968 return DAG.getUNDEF(Op.getValueType());
969 if (IdentityLHS)
970 return Op.getOperand(0);
971 if (IdentityRHS)
972 return Op.getOperand(1);
973 break;
974 }
975 default:
976 // TODO: Probably okay to remove after audit; here to reduce change size
977 // in initial enablement patch for scalable vectors
978 if (VT.isScalableVector())
979 return SDValue();
980
981 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
983 Op, DemandedBits, DemandedElts, DAG, Depth))
984 return V;
985 break;
986 }
987 return SDValue();
988}
989
992 unsigned Depth) const {
993 EVT VT = Op.getValueType();
994 // Since the number of lanes in a scalable vector is unknown at compile time,
995 // we track one bit which is implicitly broadcast to all lanes. This means
996 // that all lanes in a scalable vector are considered demanded.
997 APInt DemandedElts = VT.isFixedLengthVector()
999 : APInt(1, 1);
1000 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1001 Depth);
1002}
1003
1005 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1006 unsigned Depth) const {
1007 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1008 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1009 Depth);
1010}
1011
1012// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1013// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1016 const TargetLowering &TLI,
1017 const APInt &DemandedBits,
1018 const APInt &DemandedElts, unsigned Depth) {
1019 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1020 "SRL or SRA node is required here!");
1021 // Is the right shift using an immediate value of 1?
1022 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1023 if (!N1C || !N1C->isOne())
1024 return SDValue();
1025
1026 // We are looking for an avgfloor
1027 // add(ext, ext)
1028 // or one of these as a avgceil
1029 // add(add(ext, ext), 1)
1030 // add(add(ext, 1), ext)
1031 // add(ext, add(ext, 1))
1032 SDValue Add = Op.getOperand(0);
1033 if (Add.getOpcode() != ISD::ADD)
1034 return SDValue();
1035
1036 SDValue ExtOpA = Add.getOperand(0);
1037 SDValue ExtOpB = Add.getOperand(1);
1038 SDValue Add2;
1039 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1040 ConstantSDNode *ConstOp;
1041 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1042 ConstOp->isOne()) {
1043 ExtOpA = Op1;
1044 ExtOpB = Op3;
1045 Add2 = A;
1046 return true;
1047 }
1048 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1049 ConstOp->isOne()) {
1050 ExtOpA = Op1;
1051 ExtOpB = Op2;
1052 Add2 = A;
1053 return true;
1054 }
1055 return false;
1056 };
1057 bool IsCeil =
1058 (ExtOpA.getOpcode() == ISD::ADD &&
1059 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1060 (ExtOpB.getOpcode() == ISD::ADD &&
1061 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1062
1063 // If the shift is signed (sra):
1064 // - Needs >= 2 sign bit for both operands.
1065 // - Needs >= 2 zero bits.
1066 // If the shift is unsigned (srl):
1067 // - Needs >= 1 zero bit for both operands.
1068 // - Needs 1 demanded bit zero and >= 2 sign bits.
1069 SelectionDAG &DAG = TLO.DAG;
1070 unsigned ShiftOpc = Op.getOpcode();
1071 bool IsSigned = false;
1072 unsigned KnownBits;
1073 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1074 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1075 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1076 unsigned NumZeroA =
1077 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1078 unsigned NumZeroB =
1079 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1080 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1081
1082 switch (ShiftOpc) {
1083 default:
1084 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1085 case ISD::SRA: {
1086 if (NumZero >= 2 && NumSigned < NumZero) {
1087 IsSigned = false;
1088 KnownBits = NumZero;
1089 break;
1090 }
1091 if (NumSigned >= 1) {
1092 IsSigned = true;
1093 KnownBits = NumSigned;
1094 break;
1095 }
1096 return SDValue();
1097 }
1098 case ISD::SRL: {
1099 if (NumZero >= 1 && NumSigned < NumZero) {
1100 IsSigned = false;
1101 KnownBits = NumZero;
1102 break;
1103 }
1104 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1105 IsSigned = true;
1106 KnownBits = NumSigned;
1107 break;
1108 }
1109 return SDValue();
1110 }
1111 }
1112
1113 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1114 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1115
1116 // Find the smallest power-2 type that is legal for this vector size and
1117 // operation, given the original type size and the number of known sign/zero
1118 // bits.
1119 EVT VT = Op.getValueType();
1120 unsigned MinWidth =
1121 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1122 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1124 return SDValue();
1125 if (VT.isVector())
1126 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1127 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1128 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1129 // larger type size to do the transform.
1130 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1131 return SDValue();
1132 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1133 Add.getOperand(1)) &&
1134 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1135 Add2.getOperand(1))))
1136 NVT = VT;
1137 else
1138 return SDValue();
1139 }
1140
1141 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1142 // this is likely to stop other folds (reassociation, value tracking etc.)
1143 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1144 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1145 return SDValue();
1146
1147 SDLoc DL(Op);
1148 SDValue ResultAVG =
1149 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1150 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1151 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1152}
1153
1154/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1155/// result of Op are ever used downstream. If we can use this information to
1156/// simplify Op, create a new simplified DAG node and return true, returning the
1157/// original and new nodes in Old and New. Otherwise, analyze the expression and
1158/// return a mask of Known bits for the expression (used to simplify the
1159/// caller). The Known bits may only be accurate for those bits in the
1160/// OriginalDemandedBits and OriginalDemandedElts.
1162 SDValue Op, const APInt &OriginalDemandedBits,
1163 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1164 unsigned Depth, bool AssumeSingleUse) const {
1165 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1166 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1167 "Mask size mismatches value type size!");
1168
1169 // Don't know anything.
1170 Known = KnownBits(BitWidth);
1171
1172 EVT VT = Op.getValueType();
1173 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1174 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1175 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1176 "Unexpected vector size");
1177
1178 APInt DemandedBits = OriginalDemandedBits;
1179 APInt DemandedElts = OriginalDemandedElts;
1180 SDLoc dl(Op);
1181
1182 // Undef operand.
1183 if (Op.isUndef())
1184 return false;
1185
1186 // We can't simplify target constants.
1187 if (Op.getOpcode() == ISD::TargetConstant)
1188 return false;
1189
1190 if (Op.getOpcode() == ISD::Constant) {
1191 // We know all of the bits for a constant!
1192 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1193 return false;
1194 }
1195
1196 if (Op.getOpcode() == ISD::ConstantFP) {
1197 // We know all of the bits for a floating point constant!
1199 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1200 return false;
1201 }
1202
1203 // Other users may use these bits.
1204 bool HasMultiUse = false;
1205 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1207 // Limit search depth.
1208 return false;
1209 }
1210 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1212 DemandedElts = APInt::getAllOnes(NumElts);
1213 HasMultiUse = true;
1214 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1215 // Not demanding any bits/elts from Op.
1216 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1217 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1218 // Limit search depth.
1219 return false;
1220 }
1221
1222 KnownBits Known2;
1223 switch (Op.getOpcode()) {
1224 case ISD::SCALAR_TO_VECTOR: {
1225 if (VT.isScalableVector())
1226 return false;
1227 if (!DemandedElts[0])
1228 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1229
1230 KnownBits SrcKnown;
1231 SDValue Src = Op.getOperand(0);
1232 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1233 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1234 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1235 return true;
1236
1237 // Upper elements are undef, so only get the knownbits if we just demand
1238 // the bottom element.
1239 if (DemandedElts == 1)
1240 Known = SrcKnown.anyextOrTrunc(BitWidth);
1241 break;
1242 }
1243 case ISD::BUILD_VECTOR:
1244 // Collect the known bits that are shared by every demanded element.
1245 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1246 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1247 return false; // Don't fall through, will infinitely loop.
1248 case ISD::SPLAT_VECTOR: {
1249 SDValue Scl = Op.getOperand(0);
1250 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1251 KnownBits KnownScl;
1252 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1253 return true;
1254
1255 // Implicitly truncate the bits to match the official semantics of
1256 // SPLAT_VECTOR.
1257 Known = KnownScl.trunc(BitWidth);
1258 break;
1259 }
1260 case ISD::LOAD: {
1261 auto *LD = cast<LoadSDNode>(Op);
1262 if (getTargetConstantFromLoad(LD)) {
1263 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1264 return false; // Don't fall through, will infinitely loop.
1265 }
1266 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1267 // If this is a ZEXTLoad and we are looking at the loaded value.
1268 EVT MemVT = LD->getMemoryVT();
1269 unsigned MemBits = MemVT.getScalarSizeInBits();
1270 Known.Zero.setBitsFrom(MemBits);
1271 return false; // Don't fall through, will infinitely loop.
1272 }
1273 break;
1274 }
1276 if (VT.isScalableVector())
1277 return false;
1278 SDValue Vec = Op.getOperand(0);
1279 SDValue Scl = Op.getOperand(1);
1280 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1281 EVT VecVT = Vec.getValueType();
1282
1283 // If index isn't constant, assume we need all vector elements AND the
1284 // inserted element.
1285 APInt DemandedVecElts(DemandedElts);
1286 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1287 unsigned Idx = CIdx->getZExtValue();
1288 DemandedVecElts.clearBit(Idx);
1289
1290 // Inserted element is not required.
1291 if (!DemandedElts[Idx])
1292 return TLO.CombineTo(Op, Vec);
1293 }
1294
1295 KnownBits KnownScl;
1296 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1297 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1298 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1299 return true;
1300
1301 Known = KnownScl.anyextOrTrunc(BitWidth);
1302
1303 KnownBits KnownVec;
1304 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1305 Depth + 1))
1306 return true;
1307
1308 if (!!DemandedVecElts)
1309 Known = Known.intersectWith(KnownVec);
1310
1311 return false;
1312 }
1313 case ISD::INSERT_SUBVECTOR: {
1314 if (VT.isScalableVector())
1315 return false;
1316 // Demand any elements from the subvector and the remainder from the src its
1317 // inserted into.
1318 SDValue Src = Op.getOperand(0);
1319 SDValue Sub = Op.getOperand(1);
1320 uint64_t Idx = Op.getConstantOperandVal(2);
1321 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1322 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1323 APInt DemandedSrcElts = DemandedElts;
1324 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1325
1326 KnownBits KnownSub, KnownSrc;
1327 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1328 Depth + 1))
1329 return true;
1330 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1331 Depth + 1))
1332 return true;
1333
1334 Known.setAllConflict();
1335 if (!!DemandedSubElts)
1336 Known = Known.intersectWith(KnownSub);
1337 if (!!DemandedSrcElts)
1338 Known = Known.intersectWith(KnownSrc);
1339
1340 // Attempt to avoid multi-use src if we don't need anything from it.
1341 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1342 !DemandedSrcElts.isAllOnes()) {
1344 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1346 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1347 if (NewSub || NewSrc) {
1348 NewSub = NewSub ? NewSub : Sub;
1349 NewSrc = NewSrc ? NewSrc : Src;
1350 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1351 Op.getOperand(2));
1352 return TLO.CombineTo(Op, NewOp);
1353 }
1354 }
1355 break;
1356 }
1358 if (VT.isScalableVector())
1359 return false;
1360 // Offset the demanded elts by the subvector index.
1361 SDValue Src = Op.getOperand(0);
1362 if (Src.getValueType().isScalableVector())
1363 break;
1364 uint64_t Idx = Op.getConstantOperandVal(1);
1365 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1366 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1367
1368 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1369 Depth + 1))
1370 return true;
1371
1372 // Attempt to avoid multi-use src if we don't need anything from it.
1373 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1375 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1376 if (DemandedSrc) {
1377 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1378 Op.getOperand(1));
1379 return TLO.CombineTo(Op, NewOp);
1380 }
1381 }
1382 break;
1383 }
1384 case ISD::CONCAT_VECTORS: {
1385 if (VT.isScalableVector())
1386 return false;
1387 Known.setAllConflict();
1388 EVT SubVT = Op.getOperand(0).getValueType();
1389 unsigned NumSubVecs = Op.getNumOperands();
1390 unsigned NumSubElts = SubVT.getVectorNumElements();
1391 for (unsigned i = 0; i != NumSubVecs; ++i) {
1392 APInt DemandedSubElts =
1393 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1394 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1395 Known2, TLO, Depth + 1))
1396 return true;
1397 // Known bits are shared by every demanded subvector element.
1398 if (!!DemandedSubElts)
1399 Known = Known.intersectWith(Known2);
1400 }
1401 break;
1402 }
1403 case ISD::VECTOR_SHUFFLE: {
1404 assert(!VT.isScalableVector());
1405 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1406
1407 // Collect demanded elements from shuffle operands..
1408 APInt DemandedLHS, DemandedRHS;
1409 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1410 DemandedRHS))
1411 break;
1412
1413 if (!!DemandedLHS || !!DemandedRHS) {
1414 SDValue Op0 = Op.getOperand(0);
1415 SDValue Op1 = Op.getOperand(1);
1416
1417 Known.setAllConflict();
1418 if (!!DemandedLHS) {
1419 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1420 Depth + 1))
1421 return true;
1422 Known = Known.intersectWith(Known2);
1423 }
1424 if (!!DemandedRHS) {
1425 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1426 Depth + 1))
1427 return true;
1428 Known = Known.intersectWith(Known2);
1429 }
1430
1431 // Attempt to avoid multi-use ops if we don't need anything from them.
1433 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1435 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1436 if (DemandedOp0 || DemandedOp1) {
1437 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1438 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1439 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1440 return TLO.CombineTo(Op, NewOp);
1441 }
1442 }
1443 break;
1444 }
1445 case ISD::AND: {
1446 SDValue Op0 = Op.getOperand(0);
1447 SDValue Op1 = Op.getOperand(1);
1448
1449 // If the RHS is a constant, check to see if the LHS would be zero without
1450 // using the bits from the RHS. Below, we use knowledge about the RHS to
1451 // simplify the LHS, here we're using information from the LHS to simplify
1452 // the RHS.
1453 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1454 // Do not increment Depth here; that can cause an infinite loop.
1455 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1456 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1457 if ((LHSKnown.Zero & DemandedBits) ==
1458 (~RHSC->getAPIntValue() & DemandedBits))
1459 return TLO.CombineTo(Op, Op0);
1460
1461 // If any of the set bits in the RHS are known zero on the LHS, shrink
1462 // the constant.
1463 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1464 DemandedElts, TLO))
1465 return true;
1466
1467 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1468 // constant, but if this 'and' is only clearing bits that were just set by
1469 // the xor, then this 'and' can be eliminated by shrinking the mask of
1470 // the xor. For example, for a 32-bit X:
1471 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1472 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1473 LHSKnown.One == ~RHSC->getAPIntValue()) {
1474 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1475 return TLO.CombineTo(Op, Xor);
1476 }
1477 }
1478
1479 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1480 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1481 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1482 (Op0.getOperand(0).isUndef() ||
1484 Op0->hasOneUse()) {
1485 unsigned NumSubElts =
1487 unsigned SubIdx = Op0.getConstantOperandVal(2);
1488 APInt DemandedSub =
1489 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1490 KnownBits KnownSubMask =
1491 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1492 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1493 SDValue NewAnd =
1494 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1495 SDValue NewInsert =
1496 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1497 Op0.getOperand(1), Op0.getOperand(2));
1498 return TLO.CombineTo(Op, NewInsert);
1499 }
1500 }
1501
1502 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1503 Depth + 1))
1504 return true;
1505 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1506 Known2, TLO, Depth + 1))
1507 return true;
1508
1509 // If all of the demanded bits are known one on one side, return the other.
1510 // These bits cannot contribute to the result of the 'and'.
1511 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1512 return TLO.CombineTo(Op, Op0);
1513 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1514 return TLO.CombineTo(Op, Op1);
1515 // If all of the demanded bits in the inputs are known zeros, return zero.
1516 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1517 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1518 // If the RHS is a constant, see if we can simplify it.
1519 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1520 TLO))
1521 return true;
1522 // If the operation can be done in a smaller type, do so.
1524 return true;
1525
1526 // Attempt to avoid multi-use ops if we don't need anything from them.
1527 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1529 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1531 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1532 if (DemandedOp0 || DemandedOp1) {
1533 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1534 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1535 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1536 return TLO.CombineTo(Op, NewOp);
1537 }
1538 }
1539
1540 Known &= Known2;
1541 break;
1542 }
1543 case ISD::OR: {
1544 SDValue Op0 = Op.getOperand(0);
1545 SDValue Op1 = Op.getOperand(1);
1546 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1547 Depth + 1)) {
1548 Op->dropFlags(SDNodeFlags::Disjoint);
1549 return true;
1550 }
1551
1552 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1553 Known2, TLO, Depth + 1)) {
1554 Op->dropFlags(SDNodeFlags::Disjoint);
1555 return true;
1556 }
1557
1558 // If all of the demanded bits are known zero on one side, return the other.
1559 // These bits cannot contribute to the result of the 'or'.
1560 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1561 return TLO.CombineTo(Op, Op0);
1562 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1563 return TLO.CombineTo(Op, Op1);
1564 // If the RHS is a constant, see if we can simplify it.
1565 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1566 return true;
1567 // If the operation can be done in a smaller type, do so.
1569 return true;
1570
1571 // Attempt to avoid multi-use ops if we don't need anything from them.
1572 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1574 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1576 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1577 if (DemandedOp0 || DemandedOp1) {
1578 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1579 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1580 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1581 return TLO.CombineTo(Op, NewOp);
1582 }
1583 }
1584
1585 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1586 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1587 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1588 Op0->hasOneUse() && Op1->hasOneUse()) {
1589 // Attempt to match all commutations - m_c_Or would've been useful!
1590 for (int I = 0; I != 2; ++I) {
1591 SDValue X = Op.getOperand(I).getOperand(0);
1592 SDValue C1 = Op.getOperand(I).getOperand(1);
1593 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1594 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1595 if (Alt.getOpcode() == ISD::OR) {
1596 for (int J = 0; J != 2; ++J) {
1597 if (X == Alt.getOperand(J)) {
1598 SDValue Y = Alt.getOperand(1 - J);
1599 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1600 {C1, C2})) {
1601 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1602 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1603 return TLO.CombineTo(
1604 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1605 }
1606 }
1607 }
1608 }
1609 }
1610 }
1611
1612 Known |= Known2;
1613 break;
1614 }
1615 case ISD::XOR: {
1616 SDValue Op0 = Op.getOperand(0);
1617 SDValue Op1 = Op.getOperand(1);
1618
1619 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1620 Depth + 1))
1621 return true;
1622 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1623 Depth + 1))
1624 return true;
1625
1626 // If all of the demanded bits are known zero on one side, return the other.
1627 // These bits cannot contribute to the result of the 'xor'.
1628 if (DemandedBits.isSubsetOf(Known.Zero))
1629 return TLO.CombineTo(Op, Op0);
1630 if (DemandedBits.isSubsetOf(Known2.Zero))
1631 return TLO.CombineTo(Op, Op1);
1632 // If the operation can be done in a smaller type, do so.
1634 return true;
1635
1636 // If all of the unknown bits are known to be zero on one side or the other
1637 // turn this into an *inclusive* or.
1638 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1639 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1640 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1641
1642 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1643 if (C) {
1644 // If one side is a constant, and all of the set bits in the constant are
1645 // also known set on the other side, turn this into an AND, as we know
1646 // the bits will be cleared.
1647 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1648 // NB: it is okay if more bits are known than are requested
1649 if (C->getAPIntValue() == Known2.One) {
1650 SDValue ANDC =
1651 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1652 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1653 }
1654
1655 // If the RHS is a constant, see if we can change it. Don't alter a -1
1656 // constant because that's a 'not' op, and that is better for combining
1657 // and codegen.
1658 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1659 // We're flipping all demanded bits. Flip the undemanded bits too.
1660 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1661 return TLO.CombineTo(Op, New);
1662 }
1663
1664 unsigned Op0Opcode = Op0.getOpcode();
1665 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1666 if (ConstantSDNode *ShiftC =
1667 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1668 // Don't crash on an oversized shift. We can not guarantee that a
1669 // bogus shift has been simplified to undef.
1670 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1671 uint64_t ShiftAmt = ShiftC->getZExtValue();
1673 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1674 : Ones.lshr(ShiftAmt);
1675 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1677 // If the xor constant is a demanded mask, do a 'not' before the
1678 // shift:
1679 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1680 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1681 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1682 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1683 Op0.getOperand(1)));
1684 }
1685 }
1686 }
1687 }
1688 }
1689
1690 // If we can't turn this into a 'not', try to shrink the constant.
1691 if (!C || !C->isAllOnes())
1692 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1693 return true;
1694
1695 // Attempt to avoid multi-use ops if we don't need anything from them.
1696 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1698 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1700 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1701 if (DemandedOp0 || DemandedOp1) {
1702 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1703 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1704 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1705 return TLO.CombineTo(Op, NewOp);
1706 }
1707 }
1708
1709 Known ^= Known2;
1710 break;
1711 }
1712 case ISD::SELECT:
1713 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1714 Known, TLO, Depth + 1))
1715 return true;
1716 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1717 Known2, TLO, Depth + 1))
1718 return true;
1719
1720 // If the operands are constants, see if we can simplify them.
1721 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1722 return true;
1723
1724 // Only known if known in both the LHS and RHS.
1725 Known = Known.intersectWith(Known2);
1726 break;
1727 case ISD::VSELECT:
1728 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1729 Known, TLO, Depth + 1))
1730 return true;
1731 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1732 Known2, TLO, Depth + 1))
1733 return true;
1734
1735 // Only known if known in both the LHS and RHS.
1736 Known = Known.intersectWith(Known2);
1737 break;
1738 case ISD::SELECT_CC:
1739 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1740 Known, TLO, Depth + 1))
1741 return true;
1742 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1743 Known2, TLO, Depth + 1))
1744 return true;
1745
1746 // If the operands are constants, see if we can simplify them.
1747 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1748 return true;
1749
1750 // Only known if known in both the LHS and RHS.
1751 Known = Known.intersectWith(Known2);
1752 break;
1753 case ISD::SETCC: {
1754 SDValue Op0 = Op.getOperand(0);
1755 SDValue Op1 = Op.getOperand(1);
1756 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1757 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1758 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1759 // -1, we may be able to bypass the setcc.
1760 if (DemandedBits.isSignMask() &&
1764 // If we're testing X < 0, then this compare isn't needed - just use X!
1765 // FIXME: We're limiting to integer types here, but this should also work
1766 // if we don't care about FP signed-zero. The use of SETLT with FP means
1767 // that we don't care about NaNs.
1768 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1770 return TLO.CombineTo(Op, Op0);
1771
1772 // TODO: Should we check for other forms of sign-bit comparisons?
1773 // Examples: X <= -1, X >= 0
1774 }
1775 if (getBooleanContents(Op0.getValueType()) ==
1777 BitWidth > 1)
1778 Known.Zero.setBitsFrom(1);
1779 break;
1780 }
1781 case ISD::SHL: {
1782 SDValue Op0 = Op.getOperand(0);
1783 SDValue Op1 = Op.getOperand(1);
1784 EVT ShiftVT = Op1.getValueType();
1785
1786 if (std::optional<unsigned> KnownSA =
1787 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1788 unsigned ShAmt = *KnownSA;
1789 if (ShAmt == 0)
1790 return TLO.CombineTo(Op, Op0);
1791
1792 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1793 // single shift. We can do this if the bottom bits (which are shifted
1794 // out) are never demanded.
1795 // TODO - support non-uniform vector amounts.
1796 if (Op0.getOpcode() == ISD::SRL) {
1797 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1798 if (std::optional<unsigned> InnerSA =
1799 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1800 unsigned C1 = *InnerSA;
1801 unsigned Opc = ISD::SHL;
1802 int Diff = ShAmt - C1;
1803 if (Diff < 0) {
1804 Diff = -Diff;
1805 Opc = ISD::SRL;
1806 }
1807 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1808 return TLO.CombineTo(
1809 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1810 }
1811 }
1812 }
1813
1814 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1815 // are not demanded. This will likely allow the anyext to be folded away.
1816 // TODO - support non-uniform vector amounts.
1817 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1818 SDValue InnerOp = Op0.getOperand(0);
1819 EVT InnerVT = InnerOp.getValueType();
1820 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1821 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1822 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1823 SDValue NarrowShl = TLO.DAG.getNode(
1824 ISD::SHL, dl, InnerVT, InnerOp,
1825 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1826 return TLO.CombineTo(
1827 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1828 }
1829
1830 // Repeat the SHL optimization above in cases where an extension
1831 // intervenes: (shl (anyext (shr x, c1)), c2) to
1832 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1833 // aren't demanded (as above) and that the shifted upper c1 bits of
1834 // x aren't demanded.
1835 // TODO - support non-uniform vector amounts.
1836 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1837 InnerOp.hasOneUse()) {
1838 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1839 InnerOp, DemandedElts, Depth + 2)) {
1840 unsigned InnerShAmt = *SA2;
1841 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1842 DemandedBits.getActiveBits() <=
1843 (InnerBits - InnerShAmt + ShAmt) &&
1844 DemandedBits.countr_zero() >= ShAmt) {
1845 SDValue NewSA =
1846 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1847 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1848 InnerOp.getOperand(0));
1849 return TLO.CombineTo(
1850 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1851 }
1852 }
1853 }
1854 }
1855
1856 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1857 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1858 Depth + 1)) {
1859 // Disable the nsw and nuw flags. We can no longer guarantee that we
1860 // won't wrap after simplification.
1861 Op->dropFlags(SDNodeFlags::NoWrap);
1862 return true;
1863 }
1864 Known <<= ShAmt;
1865 // low bits known zero.
1866 Known.Zero.setLowBits(ShAmt);
1867
1868 // Attempt to avoid multi-use ops if we don't need anything from them.
1869 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1871 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1872 if (DemandedOp0) {
1873 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1874 return TLO.CombineTo(Op, NewOp);
1875 }
1876 }
1877
1878 // TODO: Can we merge this fold with the one below?
1879 // Try shrinking the operation as long as the shift amount will still be
1880 // in range.
1881 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1882 Op.getNode()->hasOneUse()) {
1883 // Search for the smallest integer type with free casts to and from
1884 // Op's type. For expedience, just check power-of-2 integer types.
1885 unsigned DemandedSize = DemandedBits.getActiveBits();
1886 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1887 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1888 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1889 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1890 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1891 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1892 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1893 assert(DemandedSize <= SmallVTBits &&
1894 "Narrowed below demanded bits?");
1895 // We found a type with free casts.
1896 SDValue NarrowShl = TLO.DAG.getNode(
1897 ISD::SHL, dl, SmallVT,
1898 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1899 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1900 return TLO.CombineTo(
1901 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1902 }
1903 }
1904 }
1905
1906 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1907 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1908 // Only do this if we demand the upper half so the knownbits are correct.
1909 unsigned HalfWidth = BitWidth / 2;
1910 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1911 DemandedBits.countLeadingOnes() >= HalfWidth) {
1912 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1913 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1914 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1915 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1916 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1917 // If we're demanding the upper bits at all, we must ensure
1918 // that the upper bits of the shift result are known to be zero,
1919 // which is equivalent to the narrow shift being NUW.
1920 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1921 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1922 SDNodeFlags Flags;
1923 Flags.setNoSignedWrap(IsNSW);
1924 Flags.setNoUnsignedWrap(IsNUW);
1925 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1926 SDValue NewShiftAmt =
1927 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1928 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1929 NewShiftAmt, Flags);
1930 SDValue NewExt =
1931 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1932 return TLO.CombineTo(Op, NewExt);
1933 }
1934 }
1935 }
1936 } else {
1937 // This is a variable shift, so we can't shift the demand mask by a known
1938 // amount. But if we are not demanding high bits, then we are not
1939 // demanding those bits from the pre-shifted operand either.
1940 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1941 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1942 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1943 Depth + 1)) {
1944 // Disable the nsw and nuw flags. We can no longer guarantee that we
1945 // won't wrap after simplification.
1946 Op->dropFlags(SDNodeFlags::NoWrap);
1947 return true;
1948 }
1949 Known.resetAll();
1950 }
1951 }
1952
1953 // If we are only demanding sign bits then we can use the shift source
1954 // directly.
1955 if (std::optional<unsigned> MaxSA =
1956 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1957 unsigned ShAmt = *MaxSA;
1958 unsigned NumSignBits =
1959 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1960 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1961 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1962 return TLO.CombineTo(Op, Op0);
1963 }
1964 break;
1965 }
1966 case ISD::SRL: {
1967 SDValue Op0 = Op.getOperand(0);
1968 SDValue Op1 = Op.getOperand(1);
1969 EVT ShiftVT = Op1.getValueType();
1970
1971 if (std::optional<unsigned> KnownSA =
1972 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1973 unsigned ShAmt = *KnownSA;
1974 if (ShAmt == 0)
1975 return TLO.CombineTo(Op, Op0);
1976
1977 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1978 // single shift. We can do this if the top bits (which are shifted out)
1979 // are never demanded.
1980 // TODO - support non-uniform vector amounts.
1981 if (Op0.getOpcode() == ISD::SHL) {
1982 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1983 if (std::optional<unsigned> InnerSA =
1984 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1985 unsigned C1 = *InnerSA;
1986 unsigned Opc = ISD::SRL;
1987 int Diff = ShAmt - C1;
1988 if (Diff < 0) {
1989 Diff = -Diff;
1990 Opc = ISD::SHL;
1991 }
1992 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1993 return TLO.CombineTo(
1994 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1995 }
1996 }
1997 }
1998
1999 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2000 // single sra. We can do this if the top bits are never demanded.
2001 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2002 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2003 if (std::optional<unsigned> InnerSA =
2004 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2005 unsigned C1 = *InnerSA;
2006 // Clamp the combined shift amount if it exceeds the bit width.
2007 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2008 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2009 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2010 Op0.getOperand(0), NewSA));
2011 }
2012 }
2013 }
2014
2015 APInt InDemandedMask = (DemandedBits << ShAmt);
2016
2017 // If the shift is exact, then it does demand the low bits (and knows that
2018 // they are zero).
2019 if (Op->getFlags().hasExact())
2020 InDemandedMask.setLowBits(ShAmt);
2021
2022 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2023 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2024 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2026 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2027 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2028 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2029 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2030 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2031 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2032 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2033 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2034 SDValue NewShiftAmt =
2035 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2036 SDValue NewShift =
2037 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2038 return TLO.CombineTo(
2039 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2040 }
2041 }
2042
2043 // Compute the new bits that are at the top now.
2044 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2045 Depth + 1))
2046 return true;
2047 Known >>= ShAmt;
2048 // High bits known zero.
2049 Known.Zero.setHighBits(ShAmt);
2050
2051 // Attempt to avoid multi-use ops if we don't need anything from them.
2052 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2054 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2055 if (DemandedOp0) {
2056 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2057 return TLO.CombineTo(Op, NewOp);
2058 }
2059 }
2060 } else {
2061 // Use generic knownbits computation as it has support for non-uniform
2062 // shift amounts.
2063 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2064 }
2065
2066 // If we are only demanding sign bits then we can use the shift source
2067 // directly.
2068 if (std::optional<unsigned> MaxSA =
2069 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2070 unsigned ShAmt = *MaxSA;
2071 // Must already be signbits in DemandedBits bounds, and can't demand any
2072 // shifted in zeroes.
2073 if (DemandedBits.countl_zero() >= ShAmt) {
2074 unsigned NumSignBits =
2075 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2076 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2077 return TLO.CombineTo(Op, Op0);
2078 }
2079 }
2080
2081 // Try to match AVG patterns (after shift simplification).
2082 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2083 DemandedElts, Depth + 1))
2084 return TLO.CombineTo(Op, AVG);
2085
2086 break;
2087 }
2088 case ISD::SRA: {
2089 SDValue Op0 = Op.getOperand(0);
2090 SDValue Op1 = Op.getOperand(1);
2091 EVT ShiftVT = Op1.getValueType();
2092
2093 // If we only want bits that already match the signbit then we don't need
2094 // to shift.
2095 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2096 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2097 NumHiDemandedBits)
2098 return TLO.CombineTo(Op, Op0);
2099
2100 // If this is an arithmetic shift right and only the low-bit is set, we can
2101 // always convert this into a logical shr, even if the shift amount is
2102 // variable. The low bit of the shift cannot be an input sign bit unless
2103 // the shift amount is >= the size of the datatype, which is undefined.
2104 if (DemandedBits.isOne())
2105 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2106
2107 if (std::optional<unsigned> KnownSA =
2108 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2109 unsigned ShAmt = *KnownSA;
2110 if (ShAmt == 0)
2111 return TLO.CombineTo(Op, Op0);
2112
2113 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2114 // supports sext_inreg.
2115 if (Op0.getOpcode() == ISD::SHL) {
2116 if (std::optional<unsigned> InnerSA =
2117 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2118 unsigned LowBits = BitWidth - ShAmt;
2119 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2120 if (VT.isVector())
2121 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2123
2124 if (*InnerSA == ShAmt) {
2125 if (!TLO.LegalOperations() ||
2127 return TLO.CombineTo(
2128 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2129 Op0.getOperand(0),
2130 TLO.DAG.getValueType(ExtVT)));
2131
2132 // Even if we can't convert to sext_inreg, we might be able to
2133 // remove this shift pair if the input is already sign extended.
2134 unsigned NumSignBits =
2135 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2136 if (NumSignBits > ShAmt)
2137 return TLO.CombineTo(Op, Op0.getOperand(0));
2138 }
2139 }
2140 }
2141
2142 APInt InDemandedMask = (DemandedBits << ShAmt);
2143
2144 // If the shift is exact, then it does demand the low bits (and knows that
2145 // they are zero).
2146 if (Op->getFlags().hasExact())
2147 InDemandedMask.setLowBits(ShAmt);
2148
2149 // If any of the demanded bits are produced by the sign extension, we also
2150 // demand the input sign bit.
2151 if (DemandedBits.countl_zero() < ShAmt)
2152 InDemandedMask.setSignBit();
2153
2154 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2155 Depth + 1))
2156 return true;
2157 Known >>= ShAmt;
2158
2159 // If the input sign bit is known to be zero, or if none of the top bits
2160 // are demanded, turn this into an unsigned shift right.
2161 if (Known.Zero[BitWidth - ShAmt - 1] ||
2162 DemandedBits.countl_zero() >= ShAmt) {
2163 SDNodeFlags Flags;
2164 Flags.setExact(Op->getFlags().hasExact());
2165 return TLO.CombineTo(
2166 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2167 }
2168
2169 int Log2 = DemandedBits.exactLogBase2();
2170 if (Log2 >= 0) {
2171 // The bit must come from the sign.
2172 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2173 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2174 }
2175
2176 if (Known.One[BitWidth - ShAmt - 1])
2177 // New bits are known one.
2178 Known.One.setHighBits(ShAmt);
2179
2180 // Attempt to avoid multi-use ops if we don't need anything from them.
2181 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2183 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2184 if (DemandedOp0) {
2185 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2186 return TLO.CombineTo(Op, NewOp);
2187 }
2188 }
2189 }
2190
2191 // Try to match AVG patterns (after shift simplification).
2192 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2193 DemandedElts, Depth + 1))
2194 return TLO.CombineTo(Op, AVG);
2195
2196 break;
2197 }
2198 case ISD::FSHL:
2199 case ISD::FSHR: {
2200 SDValue Op0 = Op.getOperand(0);
2201 SDValue Op1 = Op.getOperand(1);
2202 SDValue Op2 = Op.getOperand(2);
2203 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2204
2205 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2206 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2207
2208 // For fshl, 0-shift returns the 1st arg.
2209 // For fshr, 0-shift returns the 2nd arg.
2210 if (Amt == 0) {
2211 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2212 Known, TLO, Depth + 1))
2213 return true;
2214 break;
2215 }
2216
2217 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2218 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2219 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2220 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2221 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2222 Depth + 1))
2223 return true;
2224 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2225 Depth + 1))
2226 return true;
2227
2228 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2229 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2230 Known = Known.unionWith(Known2);
2231
2232 // Attempt to avoid multi-use ops if we don't need anything from them.
2233 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2234 !DemandedElts.isAllOnes()) {
2236 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2238 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2239 if (DemandedOp0 || DemandedOp1) {
2240 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2241 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2242 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2243 DemandedOp1, Op2);
2244 return TLO.CombineTo(Op, NewOp);
2245 }
2246 }
2247 }
2248
2249 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2250 if (isPowerOf2_32(BitWidth)) {
2251 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2252 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2253 Known2, TLO, Depth + 1))
2254 return true;
2255 }
2256 break;
2257 }
2258 case ISD::ROTL:
2259 case ISD::ROTR: {
2260 SDValue Op0 = Op.getOperand(0);
2261 SDValue Op1 = Op.getOperand(1);
2262 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2263
2264 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2265 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2266 return TLO.CombineTo(Op, Op0);
2267
2268 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2269 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2270 unsigned RevAmt = BitWidth - Amt;
2271
2272 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2273 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2274 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2275 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2276 Depth + 1))
2277 return true;
2278
2279 // rot*(x, 0) --> x
2280 if (Amt == 0)
2281 return TLO.CombineTo(Op, Op0);
2282
2283 // See if we don't demand either half of the rotated bits.
2284 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2285 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2286 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2287 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2288 }
2289 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2290 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2291 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2292 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2293 }
2294 }
2295
2296 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2297 if (isPowerOf2_32(BitWidth)) {
2298 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2299 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2300 Depth + 1))
2301 return true;
2302 }
2303 break;
2304 }
2305 case ISD::SMIN:
2306 case ISD::SMAX:
2307 case ISD::UMIN:
2308 case ISD::UMAX: {
2309 unsigned Opc = Op.getOpcode();
2310 SDValue Op0 = Op.getOperand(0);
2311 SDValue Op1 = Op.getOperand(1);
2312
2313 // If we're only demanding signbits, then we can simplify to OR/AND node.
2314 unsigned BitOp =
2315 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2316 unsigned NumSignBits =
2317 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2318 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2319 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2320 if (NumSignBits >= NumDemandedUpperBits)
2321 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2322
2323 // Check if one arg is always less/greater than (or equal) to the other arg.
2324 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2325 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2326 switch (Opc) {
2327 case ISD::SMIN:
2328 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2329 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2330 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2331 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2332 Known = KnownBits::smin(Known0, Known1);
2333 break;
2334 case ISD::SMAX:
2335 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2336 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2337 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2338 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2339 Known = KnownBits::smax(Known0, Known1);
2340 break;
2341 case ISD::UMIN:
2342 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2343 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2344 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2345 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2346 Known = KnownBits::umin(Known0, Known1);
2347 break;
2348 case ISD::UMAX:
2349 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2350 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2351 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2352 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2353 Known = KnownBits::umax(Known0, Known1);
2354 break;
2355 }
2356 break;
2357 }
2358 case ISD::BITREVERSE: {
2359 SDValue Src = Op.getOperand(0);
2360 APInt DemandedSrcBits = DemandedBits.reverseBits();
2361 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2362 Depth + 1))
2363 return true;
2364 Known = Known2.reverseBits();
2365 break;
2366 }
2367 case ISD::BSWAP: {
2368 SDValue Src = Op.getOperand(0);
2369
2370 // If the only bits demanded come from one byte of the bswap result,
2371 // just shift the input byte into position to eliminate the bswap.
2372 unsigned NLZ = DemandedBits.countl_zero();
2373 unsigned NTZ = DemandedBits.countr_zero();
2374
2375 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2376 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2377 // have 14 leading zeros, round to 8.
2378 NLZ = alignDown(NLZ, 8);
2379 NTZ = alignDown(NTZ, 8);
2380 // If we need exactly one byte, we can do this transformation.
2381 if (BitWidth - NLZ - NTZ == 8) {
2382 // Replace this with either a left or right shift to get the byte into
2383 // the right place.
2384 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2385 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2386 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2387 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2388 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2389 return TLO.CombineTo(Op, NewOp);
2390 }
2391 }
2392
2393 APInt DemandedSrcBits = DemandedBits.byteSwap();
2394 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2395 Depth + 1))
2396 return true;
2397 Known = Known2.byteSwap();
2398 break;
2399 }
2400 case ISD::CTPOP: {
2401 // If only 1 bit is demanded, replace with PARITY as long as we're before
2402 // op legalization.
2403 // FIXME: Limit to scalars for now.
2404 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2405 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2406 Op.getOperand(0)));
2407
2408 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2409 break;
2410 }
2412 SDValue Op0 = Op.getOperand(0);
2413 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2414 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2415
2416 // If we only care about the highest bit, don't bother shifting right.
2417 if (DemandedBits.isSignMask()) {
2418 unsigned MinSignedBits =
2419 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2420 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2421 // However if the input is already sign extended we expect the sign
2422 // extension to be dropped altogether later and do not simplify.
2423 if (!AlreadySignExtended) {
2424 // Compute the correct shift amount type, which must be getShiftAmountTy
2425 // for scalar types after legalization.
2426 SDValue ShiftAmt =
2427 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2428 return TLO.CombineTo(Op,
2429 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2430 }
2431 }
2432
2433 // If none of the extended bits are demanded, eliminate the sextinreg.
2434 if (DemandedBits.getActiveBits() <= ExVTBits)
2435 return TLO.CombineTo(Op, Op0);
2436
2437 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2438
2439 // Since the sign extended bits are demanded, we know that the sign
2440 // bit is demanded.
2441 InputDemandedBits.setBit(ExVTBits - 1);
2442
2443 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2444 Depth + 1))
2445 return true;
2446
2447 // If the sign bit of the input is known set or clear, then we know the
2448 // top bits of the result.
2449
2450 // If the input sign bit is known zero, convert this into a zero extension.
2451 if (Known.Zero[ExVTBits - 1])
2452 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2453
2454 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2455 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2456 Known.One.setBitsFrom(ExVTBits);
2457 Known.Zero &= Mask;
2458 } else { // Input sign bit unknown
2459 Known.Zero &= Mask;
2460 Known.One &= Mask;
2461 }
2462 break;
2463 }
2464 case ISD::BUILD_PAIR: {
2465 EVT HalfVT = Op.getOperand(0).getValueType();
2466 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2467
2468 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2469 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2470
2471 KnownBits KnownLo, KnownHi;
2472
2473 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2474 return true;
2475
2476 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2477 return true;
2478
2479 Known = KnownHi.concat(KnownLo);
2480 break;
2481 }
2483 if (VT.isScalableVector())
2484 return false;
2485 [[fallthrough]];
2486 case ISD::ZERO_EXTEND: {
2487 SDValue Src = Op.getOperand(0);
2488 EVT SrcVT = Src.getValueType();
2489 unsigned InBits = SrcVT.getScalarSizeInBits();
2490 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2491 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2492
2493 // If none of the top bits are demanded, convert this into an any_extend.
2494 if (DemandedBits.getActiveBits() <= InBits) {
2495 // If we only need the non-extended bits of the bottom element
2496 // then we can just bitcast to the result.
2497 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2498 VT.getSizeInBits() == SrcVT.getSizeInBits())
2499 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2500
2501 unsigned Opc =
2503 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2504 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2505 }
2506
2507 APInt InDemandedBits = DemandedBits.trunc(InBits);
2508 APInt InDemandedElts = DemandedElts.zext(InElts);
2509 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2510 Depth + 1)) {
2511 Op->dropFlags(SDNodeFlags::NonNeg);
2512 return true;
2513 }
2514 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2515 Known = Known.zext(BitWidth);
2516
2517 // Attempt to avoid multi-use ops if we don't need anything from them.
2519 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2520 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2521 break;
2522 }
2524 if (VT.isScalableVector())
2525 return false;
2526 [[fallthrough]];
2527 case ISD::SIGN_EXTEND: {
2528 SDValue Src = Op.getOperand(0);
2529 EVT SrcVT = Src.getValueType();
2530 unsigned InBits = SrcVT.getScalarSizeInBits();
2531 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2532 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2533
2534 APInt InDemandedElts = DemandedElts.zext(InElts);
2535 APInt InDemandedBits = DemandedBits.trunc(InBits);
2536
2537 // Since some of the sign extended bits are demanded, we know that the sign
2538 // bit is demanded.
2539 InDemandedBits.setBit(InBits - 1);
2540
2541 // If none of the top bits are demanded, convert this into an any_extend.
2542 if (DemandedBits.getActiveBits() <= InBits) {
2543 // If we only need the non-extended bits of the bottom element
2544 // then we can just bitcast to the result.
2545 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2546 VT.getSizeInBits() == SrcVT.getSizeInBits())
2547 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2548
2549 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2551 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2552 InBits) {
2553 unsigned Opc =
2555 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2556 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2557 }
2558 }
2559
2560 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2561 Depth + 1))
2562 return true;
2563 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2564
2565 // If the sign bit is known one, the top bits match.
2566 Known = Known.sext(BitWidth);
2567
2568 // If the sign bit is known zero, convert this to a zero extend.
2569 if (Known.isNonNegative()) {
2570 unsigned Opc =
2572 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2573 SDNodeFlags Flags;
2574 if (!IsVecInReg)
2575 Flags |= SDNodeFlags::NonNeg;
2576 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2577 }
2578 }
2579
2580 // Attempt to avoid multi-use ops if we don't need anything from them.
2582 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2583 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2584 break;
2585 }
2587 if (VT.isScalableVector())
2588 return false;
2589 [[fallthrough]];
2590 case ISD::ANY_EXTEND: {
2591 SDValue Src = Op.getOperand(0);
2592 EVT SrcVT = Src.getValueType();
2593 unsigned InBits = SrcVT.getScalarSizeInBits();
2594 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2595 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2596
2597 // If we only need the bottom element then we can just bitcast.
2598 // TODO: Handle ANY_EXTEND?
2599 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2600 VT.getSizeInBits() == SrcVT.getSizeInBits())
2601 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2602
2603 APInt InDemandedBits = DemandedBits.trunc(InBits);
2604 APInt InDemandedElts = DemandedElts.zext(InElts);
2605 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2606 Depth + 1))
2607 return true;
2608 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2609 Known = Known.anyext(BitWidth);
2610
2611 // Attempt to avoid multi-use ops if we don't need anything from them.
2613 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2614 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2615 break;
2616 }
2617 case ISD::TRUNCATE: {
2618 SDValue Src = Op.getOperand(0);
2619
2620 // Simplify the input, using demanded bit information, and compute the known
2621 // zero/one bits live out.
2622 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2623 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2624 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2625 Depth + 1)) {
2626 // Disable the nsw and nuw flags. We can no longer guarantee that we
2627 // won't wrap after simplification.
2628 Op->dropFlags(SDNodeFlags::NoWrap);
2629 return true;
2630 }
2631 Known = Known.trunc(BitWidth);
2632
2633 // Attempt to avoid multi-use ops if we don't need anything from them.
2635 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2636 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2637
2638 // If the input is only used by this truncate, see if we can shrink it based
2639 // on the known demanded bits.
2640 switch (Src.getOpcode()) {
2641 default:
2642 break;
2643 case ISD::SRL:
2644 // Shrink SRL by a constant if none of the high bits shifted in are
2645 // demanded.
2646 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2647 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2648 // undesirable.
2649 break;
2650
2651 if (Src.getNode()->hasOneUse()) {
2652 if (isTruncateFree(Src, VT) &&
2653 !isTruncateFree(Src.getValueType(), VT)) {
2654 // If truncate is only free at trunc(srl), do not turn it into
2655 // srl(trunc). The check is done by first check the truncate is free
2656 // at Src's opcode(srl), then check the truncate is not done by
2657 // referencing sub-register. In test, if both trunc(srl) and
2658 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2659 // trunc(srl)'s trunc is free, trunc(srl) is better.
2660 break;
2661 }
2662
2663 std::optional<unsigned> ShAmtC =
2664 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2665 if (!ShAmtC || *ShAmtC >= BitWidth)
2666 break;
2667 unsigned ShVal = *ShAmtC;
2668
2669 APInt HighBits =
2670 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2671 HighBits.lshrInPlace(ShVal);
2672 HighBits = HighBits.trunc(BitWidth);
2673 if (!(HighBits & DemandedBits)) {
2674 // None of the shifted in bits are needed. Add a truncate of the
2675 // shift input, then shift it.
2676 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2677 SDValue NewTrunc =
2678 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2679 return TLO.CombineTo(
2680 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2681 }
2682 }
2683 break;
2684 }
2685
2686 break;
2687 }
2688 case ISD::AssertZext: {
2689 // AssertZext demands all of the high bits, plus any of the low bits
2690 // demanded by its users.
2691 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2693 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2694 TLO, Depth + 1))
2695 return true;
2696
2697 Known.Zero |= ~InMask;
2698 Known.One &= (~Known.Zero);
2699 break;
2700 }
2702 SDValue Src = Op.getOperand(0);
2703 SDValue Idx = Op.getOperand(1);
2704 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2705 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2706
2707 if (SrcEltCnt.isScalable())
2708 return false;
2709
2710 // Demand the bits from every vector element without a constant index.
2711 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2712 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2713 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2714 if (CIdx->getAPIntValue().ult(NumSrcElts))
2715 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2716
2717 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2718 // anything about the extended bits.
2719 APInt DemandedSrcBits = DemandedBits;
2720 if (BitWidth > EltBitWidth)
2721 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2722
2723 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2724 Depth + 1))
2725 return true;
2726
2727 // Attempt to avoid multi-use ops if we don't need anything from them.
2728 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2729 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2730 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2731 SDValue NewOp =
2732 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2733 return TLO.CombineTo(Op, NewOp);
2734 }
2735 }
2736
2737 Known = Known2;
2738 if (BitWidth > EltBitWidth)
2739 Known = Known.anyext(BitWidth);
2740 break;
2741 }
2742 case ISD::BITCAST: {
2743 if (VT.isScalableVector())
2744 return false;
2745 SDValue Src = Op.getOperand(0);
2746 EVT SrcVT = Src.getValueType();
2747 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2748
2749 // If this is an FP->Int bitcast and if the sign bit is the only
2750 // thing demanded, turn this into a FGETSIGN.
2751 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2752 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2753 SrcVT.isFloatingPoint()) {
2754 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2755 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2756 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2757 SrcVT != MVT::f128) {
2758 // Cannot eliminate/lower SHL for f128 yet.
2759 EVT Ty = OpVTLegal ? VT : MVT::i32;
2760 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2761 // place. We expect the SHL to be eliminated by other optimizations.
2762 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2763 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2764 if (!OpVTLegal && OpVTSizeInBits > 32)
2765 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2766 unsigned ShVal = Op.getValueSizeInBits() - 1;
2767 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2768 return TLO.CombineTo(Op,
2769 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2770 }
2771 }
2772
2773 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2774 // Demand the elt/bit if any of the original elts/bits are demanded.
2775 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2776 unsigned Scale = BitWidth / NumSrcEltBits;
2777 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2778 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2779 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2780 for (unsigned i = 0; i != Scale; ++i) {
2781 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2782 unsigned BitOffset = EltOffset * NumSrcEltBits;
2783 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2784 if (!Sub.isZero()) {
2785 DemandedSrcBits |= Sub;
2786 for (unsigned j = 0; j != NumElts; ++j)
2787 if (DemandedElts[j])
2788 DemandedSrcElts.setBit((j * Scale) + i);
2789 }
2790 }
2791
2792 APInt KnownSrcUndef, KnownSrcZero;
2793 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2794 KnownSrcZero, TLO, Depth + 1))
2795 return true;
2796
2797 KnownBits KnownSrcBits;
2798 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2799 KnownSrcBits, TLO, Depth + 1))
2800 return true;
2801 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2802 // TODO - bigendian once we have test coverage.
2803 unsigned Scale = NumSrcEltBits / BitWidth;
2804 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2805 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2806 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2807 for (unsigned i = 0; i != NumElts; ++i)
2808 if (DemandedElts[i]) {
2809 unsigned Offset = (i % Scale) * BitWidth;
2810 DemandedSrcBits.insertBits(DemandedBits, Offset);
2811 DemandedSrcElts.setBit(i / Scale);
2812 }
2813
2814 if (SrcVT.isVector()) {
2815 APInt KnownSrcUndef, KnownSrcZero;
2816 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2817 KnownSrcZero, TLO, Depth + 1))
2818 return true;
2819 }
2820
2821 KnownBits KnownSrcBits;
2822 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2823 KnownSrcBits, TLO, Depth + 1))
2824 return true;
2825
2826 // Attempt to avoid multi-use ops if we don't need anything from them.
2827 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2828 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2829 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2830 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2831 return TLO.CombineTo(Op, NewOp);
2832 }
2833 }
2834 }
2835
2836 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2837 // recursive call where Known may be useful to the caller.
2838 if (Depth > 0) {
2839 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2840 return false;
2841 }
2842 break;
2843 }
2844 case ISD::MUL:
2845 if (DemandedBits.isPowerOf2()) {
2846 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2847 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2848 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2849 unsigned CTZ = DemandedBits.countr_zero();
2850 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2851 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2852 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2853 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2854 return TLO.CombineTo(Op, Shl);
2855 }
2856 }
2857 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2858 // X * X is odd iff X is odd.
2859 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2860 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2861 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2862 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2863 return TLO.CombineTo(Op, And1);
2864 }
2865 [[fallthrough]];
2866 case ISD::PTRADD:
2867 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
2868 break;
2869 // PTRADD behaves like ADD if pointers are represented as integers.
2870 [[fallthrough]];
2871 case ISD::ADD:
2872 case ISD::SUB: {
2873 // Add, Sub, and Mul don't demand any bits in positions beyond that
2874 // of the highest bit demanded of them.
2875 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2876 SDNodeFlags Flags = Op.getNode()->getFlags();
2877 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2878 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2879 KnownBits KnownOp0, KnownOp1;
2880 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2881 const KnownBits &KnownRHS) {
2882 if (Op.getOpcode() == ISD::MUL)
2883 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2884 return Demanded;
2885 };
2886 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2887 Depth + 1) ||
2888 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2889 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2890 // See if the operation should be performed at a smaller bit width.
2892 // Disable the nsw and nuw flags. We can no longer guarantee that we
2893 // won't wrap after simplification.
2894 Op->dropFlags(SDNodeFlags::NoWrap);
2895 return true;
2896 }
2897
2898 // neg x with only low bit demanded is simply x.
2899 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2900 isNullConstant(Op0))
2901 return TLO.CombineTo(Op, Op1);
2902
2903 // Attempt to avoid multi-use ops if we don't need anything from them.
2904 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2906 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2908 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2909 if (DemandedOp0 || DemandedOp1) {
2910 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2911 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2912 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2913 Flags & ~SDNodeFlags::NoWrap);
2914 return TLO.CombineTo(Op, NewOp);
2915 }
2916 }
2917
2918 // If we have a constant operand, we may be able to turn it into -1 if we
2919 // do not demand the high bits. This can make the constant smaller to
2920 // encode, allow more general folding, or match specialized instruction
2921 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2922 // is probably not useful (and could be detrimental).
2924 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2925 if (C && !C->isAllOnes() && !C->isOne() &&
2926 (C->getAPIntValue() | HighMask).isAllOnes()) {
2927 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2928 // Disable the nsw and nuw flags. We can no longer guarantee that we
2929 // won't wrap after simplification.
2930 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2931 Flags & ~SDNodeFlags::NoWrap);
2932 return TLO.CombineTo(Op, NewOp);
2933 }
2934
2935 // Match a multiply with a disguised negated-power-of-2 and convert to a
2936 // an equivalent shift-left amount.
2937 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2938 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2939 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2940 return 0;
2941
2942 // Don't touch opaque constants. Also, ignore zero and power-of-2
2943 // multiplies. Those will get folded later.
2944 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2945 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2946 !MulC->getAPIntValue().isPowerOf2()) {
2947 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2948 if (UnmaskedC.isNegatedPowerOf2())
2949 return (-UnmaskedC).logBase2();
2950 }
2951 return 0;
2952 };
2953
2954 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2955 unsigned ShlAmt) {
2956 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2957 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2958 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2959 return TLO.CombineTo(Op, Res);
2960 };
2961
2963 if (Op.getOpcode() == ISD::ADD) {
2964 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2965 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2966 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2967 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2968 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2969 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2970 }
2971 if (Op.getOpcode() == ISD::SUB) {
2972 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2973 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2974 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2975 }
2976 }
2977
2978 if (Op.getOpcode() == ISD::MUL) {
2979 Known = KnownBits::mul(KnownOp0, KnownOp1);
2980 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
2982 Op.getOpcode() != ISD::SUB, Flags.hasNoSignedWrap(),
2983 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2984 }
2985 break;
2986 }
2987 case ISD::FABS: {
2988 SDValue Op0 = Op.getOperand(0);
2989 APInt SignMask = APInt::getSignMask(BitWidth);
2990
2991 if (!DemandedBits.intersects(SignMask))
2992 return TLO.CombineTo(Op, Op0);
2993
2994 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
2995 Depth + 1))
2996 return true;
2997
2998 if (Known.isNonNegative())
2999 return TLO.CombineTo(Op, Op0);
3000 if (Known.isNegative())
3001 return TLO.CombineTo(
3002 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
3003
3004 Known.Zero |= SignMask;
3005 Known.One &= ~SignMask;
3006
3007 break;
3008 }
3009 case ISD::FCOPYSIGN: {
3010 SDValue Op0 = Op.getOperand(0);
3011 SDValue Op1 = Op.getOperand(1);
3012
3013 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3014 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3015 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3016 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3017
3018 if (!DemandedBits.intersects(SignMask0))
3019 return TLO.CombineTo(Op, Op0);
3020
3021 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3022 Known, TLO, Depth + 1) ||
3023 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3024 Depth + 1))
3025 return true;
3026
3027 if (Known2.isNonNegative())
3028 return TLO.CombineTo(
3029 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3030
3031 if (Known2.isNegative())
3032 return TLO.CombineTo(
3033 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3034 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3035
3036 Known.Zero &= ~SignMask0;
3037 Known.One &= ~SignMask0;
3038 break;
3039 }
3040 case ISD::FNEG: {
3041 SDValue Op0 = Op.getOperand(0);
3042 APInt SignMask = APInt::getSignMask(BitWidth);
3043
3044 if (!DemandedBits.intersects(SignMask))
3045 return TLO.CombineTo(Op, Op0);
3046
3047 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3048 Depth + 1))
3049 return true;
3050
3051 if (!Known.isSignUnknown()) {
3052 Known.Zero ^= SignMask;
3053 Known.One ^= SignMask;
3054 }
3055
3056 break;
3057 }
3058 default:
3059 // We also ask the target about intrinsics (which could be specific to it).
3060 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3061 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3062 // TODO: Probably okay to remove after audit; here to reduce change size
3063 // in initial enablement patch for scalable vectors
3064 if (Op.getValueType().isScalableVector())
3065 break;
3067 Known, TLO, Depth))
3068 return true;
3069 break;
3070 }
3071
3072 // Just use computeKnownBits to compute output bits.
3073 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3074 break;
3075 }
3076
3077 // If we know the value of all of the demanded bits, return this as a
3078 // constant.
3080 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3081 // Avoid folding to a constant if any OpaqueConstant is involved.
3082 if (llvm::any_of(Op->ops(), [](SDValue V) {
3083 auto *C = dyn_cast<ConstantSDNode>(V);
3084 return C && C->isOpaque();
3085 }))
3086 return false;
3087 if (VT.isInteger())
3088 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3089 if (VT.isFloatingPoint())
3090 return TLO.CombineTo(
3091 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3092 dl, VT));
3093 }
3094
3095 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3096 // Try again just for the original demanded elts.
3097 // Ensure we do this AFTER constant folding above.
3098 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3099 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3100
3101 return false;
3102}
3103
3105 const APInt &DemandedElts,
3106 DAGCombinerInfo &DCI) const {
3107 SelectionDAG &DAG = DCI.DAG;
3108 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3109 !DCI.isBeforeLegalizeOps());
3110
3111 APInt KnownUndef, KnownZero;
3112 bool Simplified =
3113 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3114 if (Simplified) {
3115 DCI.AddToWorklist(Op.getNode());
3116 DCI.CommitTargetLoweringOpt(TLO);
3117 }
3118
3119 return Simplified;
3120}
3121
3122/// Given a vector binary operation and known undefined elements for each input
3123/// operand, compute whether each element of the output is undefined.
3125 const APInt &UndefOp0,
3126 const APInt &UndefOp1) {
3127 EVT VT = BO.getValueType();
3129 "Vector binop only");
3130
3131 EVT EltVT = VT.getVectorElementType();
3132 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3133 assert(UndefOp0.getBitWidth() == NumElts &&
3134 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3135
3136 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3137 const APInt &UndefVals) {
3138 if (UndefVals[Index])
3139 return DAG.getUNDEF(EltVT);
3140
3141 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3142 // Try hard to make sure that the getNode() call is not creating temporary
3143 // nodes. Ignore opaque integers because they do not constant fold.
3144 SDValue Elt = BV->getOperand(Index);
3145 auto *C = dyn_cast<ConstantSDNode>(Elt);
3146 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3147 return Elt;
3148 }
3149
3150 return SDValue();
3151 };
3152
3153 APInt KnownUndef = APInt::getZero(NumElts);
3154 for (unsigned i = 0; i != NumElts; ++i) {
3155 // If both inputs for this element are either constant or undef and match
3156 // the element type, compute the constant/undef result for this element of
3157 // the vector.
3158 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3159 // not handle FP constants. The code within getNode() should be refactored
3160 // to avoid the danger of creating a bogus temporary node here.
3161 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3162 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3163 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3164 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3165 KnownUndef.setBit(i);
3166 }
3167 return KnownUndef;
3168}
3169
3171 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3172 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3173 bool AssumeSingleUse) const {
3174 EVT VT = Op.getValueType();
3175 unsigned Opcode = Op.getOpcode();
3176 APInt DemandedElts = OriginalDemandedElts;
3177 unsigned NumElts = DemandedElts.getBitWidth();
3178 assert(VT.isVector() && "Expected vector op");
3179
3180 KnownUndef = KnownZero = APInt::getZero(NumElts);
3181
3183 return false;
3184
3185 // TODO: For now we assume we know nothing about scalable vectors.
3186 if (VT.isScalableVector())
3187 return false;
3188
3189 assert(VT.getVectorNumElements() == NumElts &&
3190 "Mask size mismatches value type element count!");
3191
3192 // Undef operand.
3193 if (Op.isUndef()) {
3194 KnownUndef.setAllBits();
3195 return false;
3196 }
3197
3198 // If Op has other users, assume that all elements are needed.
3199 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3200 DemandedElts.setAllBits();
3201
3202 // Not demanding any elements from Op.
3203 if (DemandedElts == 0) {
3204 KnownUndef.setAllBits();
3205 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3206 }
3207
3208 // Limit search depth.
3210 return false;
3211
3212 SDLoc DL(Op);
3213 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3214 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3215
3216 // Helper for demanding the specified elements and all the bits of both binary
3217 // operands.
3218 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3219 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3220 TLO.DAG, Depth + 1);
3221 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3222 TLO.DAG, Depth + 1);
3223 if (NewOp0 || NewOp1) {
3224 SDValue NewOp =
3225 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3226 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3227 return TLO.CombineTo(Op, NewOp);
3228 }
3229 return false;
3230 };
3231
3232 switch (Opcode) {
3233 case ISD::SCALAR_TO_VECTOR: {
3234 if (!DemandedElts[0]) {
3235 KnownUndef.setAllBits();
3236 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3237 }
3238 KnownUndef.setHighBits(NumElts - 1);
3239 break;
3240 }
3241 case ISD::BITCAST: {
3242 SDValue Src = Op.getOperand(0);
3243 EVT SrcVT = Src.getValueType();
3244
3245 if (!SrcVT.isVector()) {
3246 // TODO - bigendian once we have test coverage.
3247 if (IsLE) {
3248 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3249 unsigned EltSize = VT.getScalarSizeInBits();
3250 for (unsigned I = 0; I != NumElts; ++I) {
3251 if (DemandedElts[I]) {
3252 unsigned Offset = I * EltSize;
3253 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3254 }
3255 }
3256 KnownBits Known;
3257 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3258 return true;
3259 }
3260 break;
3261 }
3262
3263 // Fast handling of 'identity' bitcasts.
3264 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3265 if (NumSrcElts == NumElts)
3266 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3267 KnownZero, TLO, Depth + 1);
3268
3269 APInt SrcDemandedElts, SrcZero, SrcUndef;
3270
3271 // Bitcast from 'large element' src vector to 'small element' vector, we
3272 // must demand a source element if any DemandedElt maps to it.
3273 if ((NumElts % NumSrcElts) == 0) {
3274 unsigned Scale = NumElts / NumSrcElts;
3275 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3276 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3277 TLO, Depth + 1))
3278 return true;
3279
3280 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3281 // of the large element.
3282 // TODO - bigendian once we have test coverage.
3283 if (IsLE) {
3284 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3285 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3286 for (unsigned i = 0; i != NumElts; ++i)
3287 if (DemandedElts[i]) {
3288 unsigned Ofs = (i % Scale) * EltSizeInBits;
3289 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3290 }
3291
3292 KnownBits Known;
3293 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3294 TLO, Depth + 1))
3295 return true;
3296
3297 // The bitcast has split each wide element into a number of
3298 // narrow subelements. We have just computed the Known bits
3299 // for wide elements. See if element splitting results in
3300 // some subelements being zero. Only for demanded elements!
3301 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3302 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3303 .isAllOnes())
3304 continue;
3305 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3306 unsigned Elt = Scale * SrcElt + SubElt;
3307 if (DemandedElts[Elt])
3308 KnownZero.setBit(Elt);
3309 }
3310 }
3311 }
3312
3313 // If the src element is zero/undef then all the output elements will be -
3314 // only demanded elements are guaranteed to be correct.
3315 for (unsigned i = 0; i != NumSrcElts; ++i) {
3316 if (SrcDemandedElts[i]) {
3317 if (SrcZero[i])
3318 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3319 if (SrcUndef[i])
3320 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3321 }
3322 }
3323 }
3324
3325 // Bitcast from 'small element' src vector to 'large element' vector, we
3326 // demand all smaller source elements covered by the larger demanded element
3327 // of this vector.
3328 if ((NumSrcElts % NumElts) == 0) {
3329 unsigned Scale = NumSrcElts / NumElts;
3330 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3331 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3332 TLO, Depth + 1))
3333 return true;
3334
3335 // If all the src elements covering an output element are zero/undef, then
3336 // the output element will be as well, assuming it was demanded.
3337 for (unsigned i = 0; i != NumElts; ++i) {
3338 if (DemandedElts[i]) {
3339 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3340 KnownZero.setBit(i);
3341 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3342 KnownUndef.setBit(i);
3343 }
3344 }
3345 }
3346 break;
3347 }
3348 case ISD::FREEZE: {
3349 SDValue N0 = Op.getOperand(0);
3350 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3351 /*PoisonOnly=*/false,
3352 Depth + 1))
3353 return TLO.CombineTo(Op, N0);
3354
3355 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3356 // freeze(op(x, ...)) -> op(freeze(x), ...).
3357 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3358 return TLO.CombineTo(
3360 TLO.DAG.getFreeze(N0.getOperand(0))));
3361 break;
3362 }
3363 case ISD::BUILD_VECTOR: {
3364 // Check all elements and simplify any unused elements with UNDEF.
3365 if (!DemandedElts.isAllOnes()) {
3366 // Don't simplify BROADCASTS.
3367 if (llvm::any_of(Op->op_values(),
3368 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3370 bool Updated = false;
3371 for (unsigned i = 0; i != NumElts; ++i) {
3372 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3373 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3374 KnownUndef.setBit(i);
3375 Updated = true;
3376 }
3377 }
3378 if (Updated)
3379 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3380 }
3381 }
3382 for (unsigned i = 0; i != NumElts; ++i) {
3383 SDValue SrcOp = Op.getOperand(i);
3384 if (SrcOp.isUndef()) {
3385 KnownUndef.setBit(i);
3386 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3388 KnownZero.setBit(i);
3389 }
3390 }
3391 break;
3392 }
3393 case ISD::CONCAT_VECTORS: {
3394 EVT SubVT = Op.getOperand(0).getValueType();
3395 unsigned NumSubVecs = Op.getNumOperands();
3396 unsigned NumSubElts = SubVT.getVectorNumElements();
3397 for (unsigned i = 0; i != NumSubVecs; ++i) {
3398 SDValue SubOp = Op.getOperand(i);
3399 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3400 APInt SubUndef, SubZero;
3401 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3402 Depth + 1))
3403 return true;
3404 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3405 KnownZero.insertBits(SubZero, i * NumSubElts);
3406 }
3407
3408 // Attempt to avoid multi-use ops if we don't need anything from them.
3409 if (!DemandedElts.isAllOnes()) {
3410 bool FoundNewSub = false;
3411 SmallVector<SDValue, 2> DemandedSubOps;
3412 for (unsigned i = 0; i != NumSubVecs; ++i) {
3413 SDValue SubOp = Op.getOperand(i);
3414 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3416 SubOp, SubElts, TLO.DAG, Depth + 1);
3417 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3418 FoundNewSub = NewSubOp ? true : FoundNewSub;
3419 }
3420 if (FoundNewSub) {
3421 SDValue NewOp =
3422 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3423 return TLO.CombineTo(Op, NewOp);
3424 }
3425 }
3426 break;
3427 }
3428 case ISD::INSERT_SUBVECTOR: {
3429 // Demand any elements from the subvector and the remainder from the src it
3430 // is inserted into.
3431 SDValue Src = Op.getOperand(0);
3432 SDValue Sub = Op.getOperand(1);
3433 uint64_t Idx = Op.getConstantOperandVal(2);
3434 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3435 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3436 APInt DemandedSrcElts = DemandedElts;
3437 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3438
3439 // If none of the sub operand elements are demanded, bypass the insert.
3440 if (!DemandedSubElts)
3441 return TLO.CombineTo(Op, Src);
3442
3443 APInt SubUndef, SubZero;
3444 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3445 Depth + 1))
3446 return true;
3447
3448 // If none of the src operand elements are demanded, replace it with undef.
3449 if (!DemandedSrcElts && !Src.isUndef())
3450 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3451 TLO.DAG.getUNDEF(VT), Sub,
3452 Op.getOperand(2)));
3453
3454 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3455 TLO, Depth + 1))
3456 return true;
3457 KnownUndef.insertBits(SubUndef, Idx);
3458 KnownZero.insertBits(SubZero, Idx);
3459
3460 // Attempt to avoid multi-use ops if we don't need anything from them.
3461 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3463 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3465 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3466 if (NewSrc || NewSub) {
3467 NewSrc = NewSrc ? NewSrc : Src;
3468 NewSub = NewSub ? NewSub : Sub;
3469 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3470 NewSub, Op.getOperand(2));
3471 return TLO.CombineTo(Op, NewOp);
3472 }
3473 }
3474 break;
3475 }
3477 // Offset the demanded elts by the subvector index.
3478 SDValue Src = Op.getOperand(0);
3479 if (Src.getValueType().isScalableVector())
3480 break;
3481 uint64_t Idx = Op.getConstantOperandVal(1);
3482 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3483 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3484
3485 APInt SrcUndef, SrcZero;
3486 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3487 Depth + 1))
3488 return true;
3489 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3490 KnownZero = SrcZero.extractBits(NumElts, Idx);
3491
3492 // Attempt to avoid multi-use ops if we don't need anything from them.
3493 if (!DemandedElts.isAllOnes()) {
3495 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3496 if (NewSrc) {
3497 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3498 Op.getOperand(1));
3499 return TLO.CombineTo(Op, NewOp);
3500 }
3501 }
3502 break;
3503 }
3505 SDValue Vec = Op.getOperand(0);
3506 SDValue Scl = Op.getOperand(1);
3507 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3508
3509 // For a legal, constant insertion index, if we don't need this insertion
3510 // then strip it, else remove it from the demanded elts.
3511 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3512 unsigned Idx = CIdx->getZExtValue();
3513 if (!DemandedElts[Idx])
3514 return TLO.CombineTo(Op, Vec);
3515
3516 APInt DemandedVecElts(DemandedElts);
3517 DemandedVecElts.clearBit(Idx);
3518 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3519 KnownZero, TLO, Depth + 1))
3520 return true;
3521
3522 KnownUndef.setBitVal(Idx, Scl.isUndef());
3523
3524 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3525 break;
3526 }
3527
3528 APInt VecUndef, VecZero;
3529 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3530 Depth + 1))
3531 return true;
3532 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3533 break;
3534 }
3535 case ISD::VSELECT: {
3536 SDValue Sel = Op.getOperand(0);
3537 SDValue LHS = Op.getOperand(1);
3538 SDValue RHS = Op.getOperand(2);
3539
3540 // Try to transform the select condition based on the current demanded
3541 // elements.
3542 APInt UndefSel, ZeroSel;
3543 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3544 Depth + 1))
3545 return true;
3546
3547 // See if we can simplify either vselect operand.
3548 APInt DemandedLHS(DemandedElts);
3549 APInt DemandedRHS(DemandedElts);
3550 APInt UndefLHS, ZeroLHS;
3551 APInt UndefRHS, ZeroRHS;
3552 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3553 Depth + 1))
3554 return true;
3555 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3556 Depth + 1))
3557 return true;
3558
3559 KnownUndef = UndefLHS & UndefRHS;
3560 KnownZero = ZeroLHS & ZeroRHS;
3561
3562 // If we know that the selected element is always zero, we don't need the
3563 // select value element.
3564 APInt DemandedSel = DemandedElts & ~KnownZero;
3565 if (DemandedSel != DemandedElts)
3566 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3567 Depth + 1))
3568 return true;
3569
3570 break;
3571 }
3572 case ISD::VECTOR_SHUFFLE: {
3573 SDValue LHS = Op.getOperand(0);
3574 SDValue RHS = Op.getOperand(1);
3575 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3576
3577 // Collect demanded elements from shuffle operands..
3578 APInt DemandedLHS(NumElts, 0);
3579 APInt DemandedRHS(NumElts, 0);
3580 for (unsigned i = 0; i != NumElts; ++i) {
3581 int M = ShuffleMask[i];
3582 if (M < 0 || !DemandedElts[i])
3583 continue;
3584 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3585 if (M < (int)NumElts)
3586 DemandedLHS.setBit(M);
3587 else
3588 DemandedRHS.setBit(M - NumElts);
3589 }
3590
3591 // If either side isn't demanded, replace it by UNDEF. We handle this
3592 // explicitly here to also simplify in case of multiple uses (on the
3593 // contrary to the SimplifyDemandedVectorElts calls below).
3594 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3595 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3596 if (FoldLHS || FoldRHS) {
3597 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3598 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3599 SDValue NewOp =
3600 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3601 return TLO.CombineTo(Op, NewOp);
3602 }
3603
3604 // See if we can simplify either shuffle operand.
3605 APInt UndefLHS, ZeroLHS;
3606 APInt UndefRHS, ZeroRHS;
3607 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3608 Depth + 1))
3609 return true;
3610 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3611 Depth + 1))
3612 return true;
3613
3614 // Simplify mask using undef elements from LHS/RHS.
3615 bool Updated = false;
3616 bool IdentityLHS = true, IdentityRHS = true;
3617 SmallVector<int, 32> NewMask(ShuffleMask);
3618 for (unsigned i = 0; i != NumElts; ++i) {
3619 int &M = NewMask[i];
3620 if (M < 0)
3621 continue;
3622 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3623 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3624 Updated = true;
3625 M = -1;
3626 }
3627 IdentityLHS &= (M < 0) || (M == (int)i);
3628 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3629 }
3630
3631 // Update legal shuffle masks based on demanded elements if it won't reduce
3632 // to Identity which can cause premature removal of the shuffle mask.
3633 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3634 SDValue LegalShuffle =
3635 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3636 if (LegalShuffle)
3637 return TLO.CombineTo(Op, LegalShuffle);
3638 }
3639
3640 // Propagate undef/zero elements from LHS/RHS.
3641 for (unsigned i = 0; i != NumElts; ++i) {
3642 int M = ShuffleMask[i];
3643 if (M < 0) {
3644 KnownUndef.setBit(i);
3645 } else if (M < (int)NumElts) {
3646 if (UndefLHS[M])
3647 KnownUndef.setBit(i);
3648 if (ZeroLHS[M])
3649 KnownZero.setBit(i);
3650 } else {
3651 if (UndefRHS[M - NumElts])
3652 KnownUndef.setBit(i);
3653 if (ZeroRHS[M - NumElts])
3654 KnownZero.setBit(i);
3655 }
3656 }
3657 break;
3658 }
3662 APInt SrcUndef, SrcZero;
3663 SDValue Src = Op.getOperand(0);
3664 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3665 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3666 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3667 Depth + 1))
3668 return true;
3669 KnownZero = SrcZero.zextOrTrunc(NumElts);
3670 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3671
3672 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3673 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3674 DemandedSrcElts == 1) {
3675 // aext - if we just need the bottom element then we can bitcast.
3676 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3677 }
3678
3679 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3680 // zext(undef) upper bits are guaranteed to be zero.
3681 if (DemandedElts.isSubsetOf(KnownUndef))
3682 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3683 KnownUndef.clearAllBits();
3684
3685 // zext - if we just need the bottom element then we can mask:
3686 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3687 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3688 Op->isOnlyUserOf(Src.getNode()) &&
3689 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3690 SDLoc DL(Op);
3691 EVT SrcVT = Src.getValueType();
3692 EVT SrcSVT = SrcVT.getScalarType();
3693 SmallVector<SDValue> MaskElts;
3694 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3695 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3696 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3697 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3698 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3699 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3700 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3701 }
3702 }
3703 }
3704 break;
3705 }
3706
3707 // TODO: There are more binop opcodes that could be handled here - MIN,
3708 // MAX, saturated math, etc.
3709 case ISD::ADD: {
3710 SDValue Op0 = Op.getOperand(0);
3711 SDValue Op1 = Op.getOperand(1);
3712 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3713 APInt UndefLHS, ZeroLHS;
3714 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3715 Depth + 1, /*AssumeSingleUse*/ true))
3716 return true;
3717 }
3718 [[fallthrough]];
3719 }
3720 case ISD::AVGCEILS:
3721 case ISD::AVGCEILU:
3722 case ISD::AVGFLOORS:
3723 case ISD::AVGFLOORU:
3724 case ISD::OR:
3725 case ISD::XOR:
3726 case ISD::SUB:
3727 case ISD::FADD:
3728 case ISD::FSUB:
3729 case ISD::FMUL:
3730 case ISD::FDIV:
3731 case ISD::FREM: {
3732 SDValue Op0 = Op.getOperand(0);
3733 SDValue Op1 = Op.getOperand(1);
3734
3735 APInt UndefRHS, ZeroRHS;
3736 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3737 Depth + 1))
3738 return true;
3739 APInt UndefLHS, ZeroLHS;
3740 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3741 Depth + 1))
3742 return true;
3743
3744 KnownZero = ZeroLHS & ZeroRHS;
3745 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3746
3747 // Attempt to avoid multi-use ops if we don't need anything from them.
3748 // TODO - use KnownUndef to relax the demandedelts?
3749 if (!DemandedElts.isAllOnes())
3750 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3751 return true;
3752 break;
3753 }
3754 case ISD::SHL:
3755 case ISD::SRL:
3756 case ISD::SRA:
3757 case ISD::ROTL:
3758 case ISD::ROTR: {
3759 SDValue Op0 = Op.getOperand(0);
3760 SDValue Op1 = Op.getOperand(1);
3761
3762 APInt UndefRHS, ZeroRHS;
3763 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3764 Depth + 1))
3765 return true;
3766 APInt UndefLHS, ZeroLHS;
3767 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3768 Depth + 1))
3769 return true;
3770
3771 KnownZero = ZeroLHS;
3772 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3773
3774 // Attempt to avoid multi-use ops if we don't need anything from them.
3775 // TODO - use KnownUndef to relax the demandedelts?
3776 if (!DemandedElts.isAllOnes())
3777 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3778 return true;
3779 break;
3780 }
3781 case ISD::MUL:
3782 case ISD::MULHU:
3783 case ISD::MULHS:
3784 case ISD::AND: {
3785 SDValue Op0 = Op.getOperand(0);
3786 SDValue Op1 = Op.getOperand(1);
3787
3788 APInt SrcUndef, SrcZero;
3789 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3790 Depth + 1))
3791 return true;
3792 // If we know that a demanded element was zero in Op1 we don't need to
3793 // demand it in Op0 - its guaranteed to be zero.
3794 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3795 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3796 TLO, Depth + 1))
3797 return true;
3798
3799 KnownUndef &= DemandedElts0;
3800 KnownZero &= DemandedElts0;
3801
3802 // If every element pair has a zero/undef then just fold to zero.
3803 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3804 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3805 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3806 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3807
3808 // If either side has a zero element, then the result element is zero, even
3809 // if the other is an UNDEF.
3810 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3811 // and then handle 'and' nodes with the rest of the binop opcodes.
3812 KnownZero |= SrcZero;
3813 KnownUndef &= SrcUndef;
3814 KnownUndef &= ~KnownZero;
3815
3816 // Attempt to avoid multi-use ops if we don't need anything from them.
3817 if (!DemandedElts.isAllOnes())
3818 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3819 return true;
3820 break;
3821 }
3822 case ISD::TRUNCATE:
3823 case ISD::SIGN_EXTEND:
3824 case ISD::ZERO_EXTEND:
3825 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3826 KnownZero, TLO, Depth + 1))
3827 return true;
3828
3829 if (!DemandedElts.isAllOnes())
3831 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3832 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3833
3834 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3835 // zext(undef) upper bits are guaranteed to be zero.
3836 if (DemandedElts.isSubsetOf(KnownUndef))
3837 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3838 KnownUndef.clearAllBits();
3839 }
3840 break;
3841 case ISD::SINT_TO_FP:
3842 case ISD::UINT_TO_FP:
3843 case ISD::FP_TO_SINT:
3844 case ISD::FP_TO_UINT:
3845 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3846 KnownZero, TLO, Depth + 1))
3847 return true;
3848 // Don't fall through to generic undef -> undef handling.
3849 return false;
3850 default: {
3851 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3852 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3853 KnownZero, TLO, Depth))
3854 return true;
3855 } else {
3856 KnownBits Known;
3857 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3858 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3859 TLO, Depth, AssumeSingleUse))
3860 return true;
3861 }
3862 break;
3863 }
3864 }
3865 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3866
3867 // Constant fold all undef cases.
3868 // TODO: Handle zero cases as well.
3869 if (DemandedElts.isSubsetOf(KnownUndef))
3870 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3871
3872 return false;
3873}
3874
3875/// Determine which of the bits specified in Mask are known to be either zero or
3876/// one and return them in the Known.
3878 KnownBits &Known,
3879 const APInt &DemandedElts,
3880 const SelectionDAG &DAG,
3881 unsigned Depth) const {
3882 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3883 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3884 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3885 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3886 "Should use MaskedValueIsZero if you don't know whether Op"
3887 " is a target node!");
3888 Known.resetAll();
3889}
3890
3893 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3894 unsigned Depth) const {
3895 Known.resetAll();
3896}
3897
3900 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3901 unsigned Depth) const {
3902 Known.resetAll();
3903}
3904
3906 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3907 // The low bits are known zero if the pointer is aligned.
3908 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3909}
3910
3916
3917/// This method can be implemented by targets that want to expose additional
3918/// information about sign bits to the DAG Combiner.
3920 const APInt &,
3921 const SelectionDAG &,
3922 unsigned Depth) const {
3923 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3924 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3925 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3926 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3927 "Should use ComputeNumSignBits if you don't know whether Op"
3928 " is a target node!");
3929 return 1;
3930}
3931
3933 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3934 const MachineRegisterInfo &MRI, unsigned Depth) const {
3935 return 1;
3936}
3937
3939 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3940 TargetLoweringOpt &TLO, unsigned Depth) const {
3941 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3942 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3943 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3944 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3945 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3946 " is a target node!");
3947 return false;
3948}
3949
3951 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3952 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3953 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3954 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3955 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3956 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3957 "Should use SimplifyDemandedBits if you don't know whether Op"
3958 " is a target node!");
3959 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3960 return false;
3961}
3962
3964 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3965 SelectionDAG &DAG, unsigned Depth) const {
3966 assert(
3967 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3968 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3969 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3970 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3971 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3972 " is a target node!");
3973 return SDValue();
3974}
3975
3976SDValue
3979 SelectionDAG &DAG) const {
3980 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3981 if (!LegalMask) {
3982 std::swap(N0, N1);
3984 LegalMask = isShuffleMaskLegal(Mask, VT);
3985 }
3986
3987 if (!LegalMask)
3988 return SDValue();
3989
3990 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3991}
3992
3994 return nullptr;
3995}
3996
3998 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3999 bool PoisonOnly, unsigned Depth) const {
4000 assert(
4001 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4002 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4003 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4004 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4005 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4006 " is a target node!");
4007
4008 // If Op can't create undef/poison and none of its operands are undef/poison
4009 // then Op is never undef/poison.
4010 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4011 /*ConsiderFlags*/ true, Depth) &&
4012 all_of(Op->ops(), [&](SDValue V) {
4013 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
4014 Depth + 1);
4015 });
4016}
4017
4019 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4020 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4021 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4022 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4023 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4024 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4025 "Should use canCreateUndefOrPoison if you don't know whether Op"
4026 " is a target node!");
4027 // Be conservative and return true.
4028 return true;
4029}
4030
4032 const APInt &DemandedElts,
4033 const SelectionDAG &DAG,
4034 bool SNaN,
4035 unsigned Depth) const {
4036 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4037 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4038 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4039 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4040 "Should use isKnownNeverNaN if you don't know whether Op"
4041 " is a target node!");
4042 return false;
4043}
4044
4046 const APInt &DemandedElts,
4047 APInt &UndefElts,
4048 const SelectionDAG &DAG,
4049 unsigned Depth) const {
4050 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4051 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4052 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4053 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4054 "Should use isSplatValue if you don't know whether Op"
4055 " is a target node!");
4056 return false;
4057}
4058
4059// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4060// work with truncating build vectors and vectors with elements of less than
4061// 8 bits.
4063 if (!N)
4064 return false;
4065
4066 unsigned EltWidth;
4067 APInt CVal;
4068 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4069 /*AllowTruncation=*/true)) {
4070 CVal = CN->getAPIntValue();
4071 EltWidth = N.getValueType().getScalarSizeInBits();
4072 } else
4073 return false;
4074
4075 // If this is a truncating splat, truncate the splat value.
4076 // Otherwise, we may fail to match the expected values below.
4077 if (EltWidth < CVal.getBitWidth())
4078 CVal = CVal.trunc(EltWidth);
4079
4080 switch (getBooleanContents(N.getValueType())) {
4082 return CVal[0];
4084 return CVal.isOne();
4086 return CVal.isAllOnes();
4087 }
4088
4089 llvm_unreachable("Invalid boolean contents");
4090}
4091
4093 if (!N)
4094 return false;
4095
4097 if (!CN) {
4099 if (!BV)
4100 return false;
4101
4102 // Only interested in constant splats, we don't care about undef
4103 // elements in identifying boolean constants and getConstantSplatNode
4104 // returns NULL if all ops are undef;
4105 CN = BV->getConstantSplatNode();
4106 if (!CN)
4107 return false;
4108 }
4109
4110 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4111 return !CN->getAPIntValue()[0];
4112
4113 return CN->isZero();
4114}
4115
4117 bool SExt) const {
4118 if (VT == MVT::i1)
4119 return N->isOne();
4120
4122 switch (Cnt) {
4124 // An extended value of 1 is always true, unless its original type is i1,
4125 // in which case it will be sign extended to -1.
4126 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4129 return N->isAllOnes() && SExt;
4130 }
4131 llvm_unreachable("Unexpected enumeration.");
4132}
4133
4134/// This helper function of SimplifySetCC tries to optimize the comparison when
4135/// either operand of the SetCC node is a bitwise-and instruction.
4136SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4137 ISD::CondCode Cond, const SDLoc &DL,
4138 DAGCombinerInfo &DCI) const {
4139 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4140 std::swap(N0, N1);
4141
4142 SelectionDAG &DAG = DCI.DAG;
4143 EVT OpVT = N0.getValueType();
4144 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4145 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4146 return SDValue();
4147
4148 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4149 // iff everything but LSB is known zero:
4150 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4153 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4154 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4155 if (DAG.MaskedValueIsZero(N0, UpperBits))
4156 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4157 }
4158
4159 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4160 // test in a narrow type that we can truncate to with no cost. Examples:
4161 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4162 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4163 // TODO: This conservatively checks for type legality on the source and
4164 // destination types. That may inhibit optimizations, but it also
4165 // allows setcc->shift transforms that may be more beneficial.
4166 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4167 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4168 isTypeLegal(OpVT) && N0.hasOneUse()) {
4169 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4170 AndC->getAPIntValue().getActiveBits());
4171 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4172 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4173 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4174 return DAG.getSetCC(DL, VT, Trunc, Zero,
4176 }
4177 }
4178
4179 // Match these patterns in any of their permutations:
4180 // (X & Y) == Y
4181 // (X & Y) != Y
4182 SDValue X, Y;
4183 if (N0.getOperand(0) == N1) {
4184 X = N0.getOperand(1);
4185 Y = N0.getOperand(0);
4186 } else if (N0.getOperand(1) == N1) {
4187 X = N0.getOperand(0);
4188 Y = N0.getOperand(1);
4189 } else {
4190 return SDValue();
4191 }
4192
4193 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4194 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4195 // its liable to create and infinite loop.
4196 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4197 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4199 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4200 // Note that where Y is variable and is known to have at most one bit set
4201 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4202 // equivalent when Y == 0.
4203 assert(OpVT.isInteger());
4205 if (DCI.isBeforeLegalizeOps() ||
4207 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4208 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4209 // If the target supports an 'and-not' or 'and-complement' logic operation,
4210 // try to use that to make a comparison operation more efficient.
4211 // But don't do this transform if the mask is a single bit because there are
4212 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4213 // 'rlwinm' on PPC).
4214
4215 // Bail out if the compare operand that we want to turn into a zero is
4216 // already a zero (otherwise, infinite loop).
4217 if (isNullConstant(Y))
4218 return SDValue();
4219
4220 // Transform this into: ~X & Y == 0.
4221 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4222 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4223 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4224 }
4225
4226 return SDValue();
4227}
4228
4229/// This helper function of SimplifySetCC tries to optimize the comparison when
4230/// either operand of the SetCC node is a bitwise-or instruction.
4231/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4232SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4233 ISD::CondCode Cond, const SDLoc &DL,
4234 DAGCombinerInfo &DCI) const {
4235 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4236 std::swap(N0, N1);
4237
4238 SelectionDAG &DAG = DCI.DAG;
4239 EVT OpVT = N0.getValueType();
4240 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4241 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4242 return SDValue();
4243
4244 // (X | Y) == Y
4245 // (X | Y) != Y
4246 SDValue X;
4247 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4248 // If the target supports an 'and-not' or 'and-complement' logic operation,
4249 // try to use that to make a comparison operation more efficient.
4250
4251 // Bail out if the compare operand that we want to turn into a zero is
4252 // already a zero (otherwise, infinite loop).
4253 if (isNullConstant(N1))
4254 return SDValue();
4255
4256 // Transform this into: X & ~Y ==/!= 0.
4257 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4258 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4259 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4260 }
4261
4262 return SDValue();
4263}
4264
4265/// There are multiple IR patterns that could be checking whether certain
4266/// truncation of a signed number would be lossy or not. The pattern which is
4267/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4268/// We are looking for the following pattern: (KeptBits is a constant)
4269/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4270/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4271/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4272/// We will unfold it into the natural trunc+sext pattern:
4273/// ((%x << C) a>> C) dstcond %x
4274/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4275SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4276 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4277 const SDLoc &DL) const {
4278 // We must be comparing with a constant.
4279 ConstantSDNode *C1;
4280 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4281 return SDValue();
4282
4283 // N0 should be: add %x, (1 << (KeptBits-1))
4284 if (N0->getOpcode() != ISD::ADD)
4285 return SDValue();
4286
4287 // And we must be 'add'ing a constant.
4288 ConstantSDNode *C01;
4289 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4290 return SDValue();
4291
4292 SDValue X = N0->getOperand(0);
4293 EVT XVT = X.getValueType();
4294
4295 // Validate constants ...
4296
4297 APInt I1 = C1->getAPIntValue();
4298
4299 ISD::CondCode NewCond;
4300 if (Cond == ISD::CondCode::SETULT) {
4301 NewCond = ISD::CondCode::SETEQ;
4302 } else if (Cond == ISD::CondCode::SETULE) {
4303 NewCond = ISD::CondCode::SETEQ;
4304 // But need to 'canonicalize' the constant.
4305 I1 += 1;
4306 } else if (Cond == ISD::CondCode::SETUGT) {
4307 NewCond = ISD::CondCode::SETNE;
4308 // But need to 'canonicalize' the constant.
4309 I1 += 1;
4310 } else if (Cond == ISD::CondCode::SETUGE) {
4311 NewCond = ISD::CondCode::SETNE;
4312 } else
4313 return SDValue();
4314
4315 APInt I01 = C01->getAPIntValue();
4316
4317 auto checkConstants = [&I1, &I01]() -> bool {
4318 // Both of them must be power-of-two, and the constant from setcc is bigger.
4319 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4320 };
4321
4322 if (checkConstants()) {
4323 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4324 } else {
4325 // What if we invert constants? (and the target predicate)
4326 I1.negate();
4327 I01.negate();
4328 assert(XVT.isInteger());
4329 NewCond = getSetCCInverse(NewCond, XVT);
4330 if (!checkConstants())
4331 return SDValue();
4332 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4333 }
4334
4335 // They are power-of-two, so which bit is set?
4336 const unsigned KeptBits = I1.logBase2();
4337 const unsigned KeptBitsMinusOne = I01.logBase2();
4338
4339 // Magic!
4340 if (KeptBits != (KeptBitsMinusOne + 1))
4341 return SDValue();
4342 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4343
4344 // We don't want to do this in every single case.
4345 SelectionDAG &DAG = DCI.DAG;
4346 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4347 return SDValue();
4348
4349 // Unfold into: sext_inreg(%x) cond %x
4350 // Where 'cond' will be either 'eq' or 'ne'.
4351 SDValue SExtInReg = DAG.getNode(
4353 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4354 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4355}
4356
4357// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4358SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4359 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4360 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4362 "Should be a comparison with 0.");
4363 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4364 "Valid only for [in]equality comparisons.");
4365
4366 unsigned NewShiftOpcode;
4367 SDValue X, C, Y;
4368
4369 SelectionDAG &DAG = DCI.DAG;
4370
4371 // Look for '(C l>>/<< Y)'.
4372 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4373 // The shift should be one-use.
4374 if (!V.hasOneUse())
4375 return false;
4376 unsigned OldShiftOpcode = V.getOpcode();
4377 switch (OldShiftOpcode) {
4378 case ISD::SHL:
4379 NewShiftOpcode = ISD::SRL;
4380 break;
4381 case ISD::SRL:
4382 NewShiftOpcode = ISD::SHL;
4383 break;
4384 default:
4385 return false; // must be a logical shift.
4386 }
4387 // We should be shifting a constant.
4388 // FIXME: best to use isConstantOrConstantVector().
4389 C = V.getOperand(0);
4390 ConstantSDNode *CC =
4391 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4392 if (!CC)
4393 return false;
4394 Y = V.getOperand(1);
4395
4396 ConstantSDNode *XC =
4397 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4399 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4400 };
4401
4402 // LHS of comparison should be an one-use 'and'.
4403 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4404 return SDValue();
4405
4406 X = N0.getOperand(0);
4407 SDValue Mask = N0.getOperand(1);
4408
4409 // 'and' is commutative!
4410 if (!Match(Mask)) {
4411 std::swap(X, Mask);
4412 if (!Match(Mask))
4413 return SDValue();
4414 }
4415
4416 EVT VT = X.getValueType();
4417
4418 // Produce:
4419 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4420 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4421 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4422 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4423 return T2;
4424}
4425
4426/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4427/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4428/// handle the commuted versions of these patterns.
4429SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4430 ISD::CondCode Cond, const SDLoc &DL,
4431 DAGCombinerInfo &DCI) const {
4432 unsigned BOpcode = N0.getOpcode();
4433 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4434 "Unexpected binop");
4435 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4436
4437 // (X + Y) == X --> Y == 0
4438 // (X - Y) == X --> Y == 0
4439 // (X ^ Y) == X --> Y == 0
4440 SelectionDAG &DAG = DCI.DAG;
4441 EVT OpVT = N0.getValueType();
4442 SDValue X = N0.getOperand(0);
4443 SDValue Y = N0.getOperand(1);
4444 if (X == N1)
4445 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4446
4447 if (Y != N1)
4448 return SDValue();
4449
4450 // (X + Y) == Y --> X == 0
4451 // (X ^ Y) == Y --> X == 0
4452 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4453 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4454
4455 // The shift would not be valid if the operands are boolean (i1).
4456 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4457 return SDValue();
4458
4459 // (X - Y) == Y --> X == Y << 1
4460 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4461 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4462 if (!DCI.isCalledByLegalizer())
4463 DCI.AddToWorklist(YShl1.getNode());
4464 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4465}
4466
4468 SDValue N0, const APInt &C1,
4469 ISD::CondCode Cond, const SDLoc &dl,
4470 SelectionDAG &DAG) {
4471 // Look through truncs that don't change the value of a ctpop.
4472 // FIXME: Add vector support? Need to be careful with setcc result type below.
4473 SDValue CTPOP = N0;
4474 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4476 CTPOP = N0.getOperand(0);
4477
4478 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4479 return SDValue();
4480
4481 EVT CTVT = CTPOP.getValueType();
4482 SDValue CTOp = CTPOP.getOperand(0);
4483
4484 // Expand a power-of-2-or-zero comparison based on ctpop:
4485 // (ctpop x) u< 2 -> (x & x-1) == 0
4486 // (ctpop x) u> 1 -> (x & x-1) != 0
4487 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4488 // Keep the CTPOP if it is a cheap vector op.
4489 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4490 return SDValue();
4491
4492 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4493 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4494 return SDValue();
4495 if (C1 == 0 && (Cond == ISD::SETULT))
4496 return SDValue(); // This is handled elsewhere.
4497
4498 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4499
4500 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4501 SDValue Result = CTOp;
4502 for (unsigned i = 0; i < Passes; i++) {
4503 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4504 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4505 }
4507 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4508 }
4509
4510 // Expand a power-of-2 comparison based on ctpop
4511 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4512 // Keep the CTPOP if it is cheap.
4513 if (TLI.isCtpopFast(CTVT))
4514 return SDValue();
4515
4516 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4517 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4518 assert(CTVT.isInteger());
4519 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4520
4521 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4522 // check before emitting a potentially unnecessary op.
4523 if (DAG.isKnownNeverZero(CTOp)) {
4524 // (ctpop x) == 1 --> (x & x-1) == 0
4525 // (ctpop x) != 1 --> (x & x-1) != 0
4526 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4527 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4528 return RHS;
4529 }
4530
4531 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4532 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4533 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4535 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4536 }
4537
4538 return SDValue();
4539}
4540
4542 ISD::CondCode Cond, const SDLoc &dl,
4543 SelectionDAG &DAG) {
4544 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4545 return SDValue();
4546
4547 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4548 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4549 return SDValue();
4550
4551 auto getRotateSource = [](SDValue X) {
4552 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4553 return X.getOperand(0);
4554 return SDValue();
4555 };
4556
4557 // Peek through a rotated value compared against 0 or -1:
4558 // (rot X, Y) == 0/-1 --> X == 0/-1
4559 // (rot X, Y) != 0/-1 --> X != 0/-1
4560 if (SDValue R = getRotateSource(N0))
4561 return DAG.getSetCC(dl, VT, R, N1, Cond);
4562
4563 // Peek through an 'or' of a rotated value compared against 0:
4564 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4565 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4566 //
4567 // TODO: Add the 'and' with -1 sibling.
4568 // TODO: Recurse through a series of 'or' ops to find the rotate.
4569 EVT OpVT = N0.getValueType();
4570 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4571 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4572 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4573 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4574 }
4575 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4576 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4577 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4578 }
4579 }
4580
4581 return SDValue();
4582}
4583
4585 ISD::CondCode Cond, const SDLoc &dl,
4586 SelectionDAG &DAG) {
4587 // If we are testing for all-bits-clear, we might be able to do that with
4588 // less shifting since bit-order does not matter.
4589 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4590 return SDValue();
4591
4592 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4593 if (!C1 || !C1->isZero())
4594 return SDValue();
4595
4596 if (!N0.hasOneUse() ||
4597 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4598 return SDValue();
4599
4600 unsigned BitWidth = N0.getScalarValueSizeInBits();
4601 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4602 if (!ShAmtC)
4603 return SDValue();
4604
4605 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4606 if (ShAmt == 0)
4607 return SDValue();
4608
4609 // Canonicalize fshr as fshl to reduce pattern-matching.
4610 if (N0.getOpcode() == ISD::FSHR)
4611 ShAmt = BitWidth - ShAmt;
4612
4613 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4614 SDValue X, Y;
4615 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4616 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4617 return false;
4618 if (Or.getOperand(0) == Other) {
4619 X = Or.getOperand(0);
4620 Y = Or.getOperand(1);
4621 return true;
4622 }
4623 if (Or.getOperand(1) == Other) {
4624 X = Or.getOperand(1);
4625 Y = Or.getOperand(0);
4626 return true;
4627 }
4628 return false;
4629 };
4630
4631 EVT OpVT = N0.getValueType();
4632 EVT ShAmtVT = N0.getOperand(2).getValueType();
4633 SDValue F0 = N0.getOperand(0);
4634 SDValue F1 = N0.getOperand(1);
4635 if (matchOr(F0, F1)) {
4636 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4637 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4638 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4639 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4640 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4641 }
4642 if (matchOr(F1, F0)) {
4643 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4644 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4645 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4646 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4647 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4648 }
4649
4650 return SDValue();
4651}
4652
4653/// Try to simplify a setcc built with the specified operands and cc. If it is
4654/// unable to simplify it, return a null SDValue.
4656 ISD::CondCode Cond, bool foldBooleans,
4657 DAGCombinerInfo &DCI,
4658 const SDLoc &dl) const {
4659 SelectionDAG &DAG = DCI.DAG;
4660 const DataLayout &Layout = DAG.getDataLayout();
4661 EVT OpVT = N0.getValueType();
4662 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4663
4664 // Constant fold or commute setcc.
4665 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4666 return Fold;
4667
4668 bool N0ConstOrSplat =
4669 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4670 bool N1ConstOrSplat =
4671 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4672
4673 // Canonicalize toward having the constant on the RHS.
4674 // TODO: Handle non-splat vector constants. All undef causes trouble.
4675 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4676 // infinite loop here when we encounter one.
4678 if (N0ConstOrSplat && !N1ConstOrSplat &&
4679 (DCI.isBeforeLegalizeOps() ||
4680 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4681 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4682
4683 // If we have a subtract with the same 2 non-constant operands as this setcc
4684 // -- but in reverse order -- then try to commute the operands of this setcc
4685 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4686 // instruction on some targets.
4687 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4688 (DCI.isBeforeLegalizeOps() ||
4689 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4690 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4691 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4692 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4693
4694 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4695 return V;
4696
4697 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4698 return V;
4699
4700 if (auto *N1C = isConstOrConstSplat(N1)) {
4701 const APInt &C1 = N1C->getAPIntValue();
4702
4703 // Optimize some CTPOP cases.
4704 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4705 return V;
4706
4707 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4708 // X * Y == 0 --> (X == 0) || (Y == 0)
4709 // X * Y != 0 --> (X != 0) && (Y != 0)
4710 // TODO: This bails out if minsize is set, but if the target doesn't have a
4711 // single instruction multiply for this type, it would likely be
4712 // smaller to decompose.
4713 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4714 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4715 (N0->getFlags().hasNoUnsignedWrap() ||
4716 N0->getFlags().hasNoSignedWrap()) &&
4717 !Attr.hasFnAttr(Attribute::MinSize)) {
4718 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4719 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4720 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4721 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4722 }
4723
4724 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4725 // equality comparison, then we're just comparing whether X itself is
4726 // zero.
4727 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4728 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4730 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4731 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4732 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4733 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4734 // (srl (ctlz x), 5) == 0 -> X != 0
4735 // (srl (ctlz x), 5) != 1 -> X != 0
4736 Cond = ISD::SETNE;
4737 } else {
4738 // (srl (ctlz x), 5) != 0 -> X == 0
4739 // (srl (ctlz x), 5) == 1 -> X == 0
4740 Cond = ISD::SETEQ;
4741 }
4742 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4743 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4744 Cond);
4745 }
4746 }
4747 }
4748 }
4749
4750 // FIXME: Support vectors.
4751 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4752 const APInt &C1 = N1C->getAPIntValue();
4753
4754 // (zext x) == C --> x == (trunc C)
4755 // (sext x) == C --> x == (trunc C)
4756 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4757 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4758 unsigned MinBits = N0.getValueSizeInBits();
4759 SDValue PreExt;
4760 bool Signed = false;
4761 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4762 // ZExt
4763 MinBits = N0->getOperand(0).getValueSizeInBits();
4764 PreExt = N0->getOperand(0);
4765 } else if (N0->getOpcode() == ISD::AND) {
4766 // DAGCombine turns costly ZExts into ANDs
4767 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4768 if ((C->getAPIntValue()+1).isPowerOf2()) {
4769 MinBits = C->getAPIntValue().countr_one();
4770 PreExt = N0->getOperand(0);
4771 }
4772 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4773 // SExt
4774 MinBits = N0->getOperand(0).getValueSizeInBits();
4775 PreExt = N0->getOperand(0);
4776 Signed = true;
4777 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4778 // ZEXTLOAD / SEXTLOAD
4779 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4780 MinBits = LN0->getMemoryVT().getSizeInBits();
4781 PreExt = N0;
4782 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4783 Signed = true;
4784 MinBits = LN0->getMemoryVT().getSizeInBits();
4785 PreExt = N0;
4786 }
4787 }
4788
4789 // Figure out how many bits we need to preserve this constant.
4790 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4791
4792 // Make sure we're not losing bits from the constant.
4793 if (MinBits > 0 &&
4794 MinBits < C1.getBitWidth() &&
4795 MinBits >= ReqdBits) {
4796 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4797 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4798 // Will get folded away.
4799 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4800 if (MinBits == 1 && C1 == 1)
4801 // Invert the condition.
4802 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4804 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4805 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4806 }
4807
4808 // If truncating the setcc operands is not desirable, we can still
4809 // simplify the expression in some cases:
4810 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4811 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4812 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4813 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4814 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4815 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4816 SDValue TopSetCC = N0->getOperand(0);
4817 unsigned N0Opc = N0->getOpcode();
4818 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4819 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4820 TopSetCC.getOpcode() == ISD::SETCC &&
4821 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4822 (isConstFalseVal(N1) ||
4823 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4824
4825 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4826 (!N1C->isZero() && Cond == ISD::SETNE);
4827
4828 if (!Inverse)
4829 return TopSetCC;
4830
4832 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4833 TopSetCC.getOperand(0).getValueType());
4834 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4835 TopSetCC.getOperand(1),
4836 InvCond);
4837 }
4838 }
4839 }
4840
4841 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4842 // equality or unsigned, and all 1 bits of the const are in the same
4843 // partial word, see if we can shorten the load.
4844 if (DCI.isBeforeLegalize() &&
4846 N0.getOpcode() == ISD::AND && C1 == 0 &&
4847 N0.getNode()->hasOneUse() &&
4848 isa<LoadSDNode>(N0.getOperand(0)) &&
4849 N0.getOperand(0).getNode()->hasOneUse() &&
4851 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4852 APInt bestMask;
4853 unsigned bestWidth = 0, bestOffset = 0;
4854 if (Lod->isSimple() && Lod->isUnindexed() &&
4855 (Lod->getMemoryVT().isByteSized() ||
4856 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4857 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4858 unsigned origWidth = N0.getValueSizeInBits();
4859 unsigned maskWidth = origWidth;
4860 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4861 // 8 bits, but have to be careful...
4862 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4863 origWidth = Lod->getMemoryVT().getSizeInBits();
4864 const APInt &Mask = N0.getConstantOperandAPInt(1);
4865 // Only consider power-of-2 widths (and at least one byte) as candiates
4866 // for the narrowed load.
4867 for (unsigned width = 8; width < origWidth; width *= 2) {
4868 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4869 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4870 // Avoid accessing any padding here for now (we could use memWidth
4871 // instead of origWidth here otherwise).
4872 unsigned maxOffset = origWidth - width;
4873 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4874 if (Mask.isSubsetOf(newMask)) {
4875 unsigned ptrOffset =
4876 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4877 unsigned IsFast = 0;
4878 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4879 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4881 ptrOffset / 8) &&
4883 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4884 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4885 IsFast) {
4886 bestOffset = ptrOffset / 8;
4887 bestMask = Mask.lshr(offset);
4888 bestWidth = width;
4889 break;
4890 }
4891 }
4892 newMask <<= 8;
4893 }
4894 if (bestWidth)
4895 break;
4896 }
4897 }
4898 if (bestWidth) {
4899 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4900 SDValue Ptr = Lod->getBasePtr();
4901 if (bestOffset != 0)
4902 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4903 SDValue NewLoad =
4904 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4905 Lod->getPointerInfo().getWithOffset(bestOffset),
4906 Lod->getBaseAlign());
4907 SDValue And =
4908 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4909 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4910 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4911 }
4912 }
4913
4914 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4915 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4916 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4917
4918 // If the comparison constant has bits in the upper part, the
4919 // zero-extended value could never match.
4921 C1.getBitWidth() - InSize))) {
4922 switch (Cond) {
4923 case ISD::SETUGT:
4924 case ISD::SETUGE:
4925 case ISD::SETEQ:
4926 return DAG.getConstant(0, dl, VT);
4927 case ISD::SETULT:
4928 case ISD::SETULE:
4929 case ISD::SETNE:
4930 return DAG.getConstant(1, dl, VT);
4931 case ISD::SETGT:
4932 case ISD::SETGE:
4933 // True if the sign bit of C1 is set.
4934 return DAG.getConstant(C1.isNegative(), dl, VT);
4935 case ISD::SETLT:
4936 case ISD::SETLE:
4937 // True if the sign bit of C1 isn't set.
4938 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4939 default:
4940 break;
4941 }
4942 }
4943
4944 // Otherwise, we can perform the comparison with the low bits.
4945 switch (Cond) {
4946 case ISD::SETEQ:
4947 case ISD::SETNE:
4948 case ISD::SETUGT:
4949 case ISD::SETUGE:
4950 case ISD::SETULT:
4951 case ISD::SETULE: {
4952 EVT newVT = N0.getOperand(0).getValueType();
4953 // FIXME: Should use isNarrowingProfitable.
4954 if (DCI.isBeforeLegalizeOps() ||
4955 (isOperationLegal(ISD::SETCC, newVT) &&
4956 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4958 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4959 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4960
4961 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4962 NewConst, Cond);
4963 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4964 }
4965 break;
4966 }
4967 default:
4968 break; // todo, be more careful with signed comparisons
4969 }
4970 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4971 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4973 OpVT)) {
4974 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4975 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4976 EVT ExtDstTy = N0.getValueType();
4977 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4978
4979 // If the constant doesn't fit into the number of bits for the source of
4980 // the sign extension, it is impossible for both sides to be equal.
4981 if (C1.getSignificantBits() > ExtSrcTyBits)
4982 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4983
4984 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4985 ExtDstTy != ExtSrcTy && "Unexpected types!");
4986 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4987 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4988 DAG.getConstant(Imm, dl, ExtDstTy));
4989 if (!DCI.isCalledByLegalizer())
4990 DCI.AddToWorklist(ZextOp.getNode());
4991 // Otherwise, make this a use of a zext.
4992 return DAG.getSetCC(dl, VT, ZextOp,
4993 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4994 } else if ((N1C->isZero() || N1C->isOne()) &&
4995 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4996 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4997 // excluded as they are handled below whilst checking for foldBooleans.
4998 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4999 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
5000 (N0.getValueType() == MVT::i1 ||
5004 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5005 if (TrueWhenTrue)
5006 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
5007 // Invert the condition.
5008 if (N0.getOpcode() == ISD::SETCC) {
5011 if (DCI.isBeforeLegalizeOps() ||
5013 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5014 }
5015 }
5016
5017 if ((N0.getOpcode() == ISD::XOR ||
5018 (N0.getOpcode() == ISD::AND &&
5019 N0.getOperand(0).getOpcode() == ISD::XOR &&
5020 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5021 isOneConstant(N0.getOperand(1))) {
5022 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5023 // can only do this if the top bits are known zero.
5024 unsigned BitWidth = N0.getValueSizeInBits();
5025 if (DAG.MaskedValueIsZero(N0,
5027 BitWidth-1))) {
5028 // Okay, get the un-inverted input value.
5029 SDValue Val;
5030 if (N0.getOpcode() == ISD::XOR) {
5031 Val = N0.getOperand(0);
5032 } else {
5033 assert(N0.getOpcode() == ISD::AND &&
5034 N0.getOperand(0).getOpcode() == ISD::XOR);
5035 // ((X^1)&1)^1 -> X & 1
5036 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5037 N0.getOperand(0).getOperand(0),
5038 N0.getOperand(1));
5039 }
5040
5041 return DAG.getSetCC(dl, VT, Val, N1,
5043 }
5044 } else if (N1C->isOne()) {
5045 SDValue Op0 = N0;
5046 if (Op0.getOpcode() == ISD::TRUNCATE)
5047 Op0 = Op0.getOperand(0);
5048
5049 if ((Op0.getOpcode() == ISD::XOR) &&
5050 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5051 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5052 SDValue XorLHS = Op0.getOperand(0);
5053 SDValue XorRHS = Op0.getOperand(1);
5054 // Ensure that the input setccs return an i1 type or 0/1 value.
5055 if (Op0.getValueType() == MVT::i1 ||
5060 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5062 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5063 }
5064 }
5065 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5066 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5067 if (Op0.getValueType().bitsGT(VT))
5068 Op0 = DAG.getNode(ISD::AND, dl, VT,
5069 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5070 DAG.getConstant(1, dl, VT));
5071 else if (Op0.getValueType().bitsLT(VT))
5072 Op0 = DAG.getNode(ISD::AND, dl, VT,
5073 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5074 DAG.getConstant(1, dl, VT));
5075
5076 return DAG.getSetCC(dl, VT, Op0,
5077 DAG.getConstant(0, dl, Op0.getValueType()),
5079 }
5080 if (Op0.getOpcode() == ISD::AssertZext &&
5081 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5082 return DAG.getSetCC(dl, VT, Op0,
5083 DAG.getConstant(0, dl, Op0.getValueType()),
5085 }
5086 }
5087
5088 // Given:
5089 // icmp eq/ne (urem %x, %y), 0
5090 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5091 // icmp eq/ne %x, 0
5092 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5093 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5094 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5095 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5096 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5097 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5098 }
5099
5100 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5101 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5102 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5104 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5105 N1C->isAllOnes()) {
5106 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5107 DAG.getConstant(0, dl, OpVT),
5109 }
5110
5111 // fold (setcc (trunc x) c) -> (setcc x c)
5112 if (N0.getOpcode() == ISD::TRUNCATE &&
5114 (N0->getFlags().hasNoSignedWrap() &&
5117 EVT NewVT = N0.getOperand(0).getValueType();
5118 SDValue NewConst = DAG.getConstant(
5120 ? C1.sext(NewVT.getSizeInBits())
5121 : C1.zext(NewVT.getSizeInBits()),
5122 dl, NewVT);
5123 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5124 }
5125
5126 if (SDValue V =
5127 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5128 return V;
5129 }
5130
5131 // These simplifications apply to splat vectors as well.
5132 // TODO: Handle more splat vector cases.
5133 if (auto *N1C = isConstOrConstSplat(N1)) {
5134 const APInt &C1 = N1C->getAPIntValue();
5135
5136 APInt MinVal, MaxVal;
5137 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5139 MinVal = APInt::getSignedMinValue(OperandBitSize);
5140 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5141 } else {
5142 MinVal = APInt::getMinValue(OperandBitSize);
5143 MaxVal = APInt::getMaxValue(OperandBitSize);
5144 }
5145
5146 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5147 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5148 // X >= MIN --> true
5149 if (C1 == MinVal)
5150 return DAG.getBoolConstant(true, dl, VT, OpVT);
5151
5152 if (!VT.isVector()) { // TODO: Support this for vectors.
5153 // X >= C0 --> X > (C0 - 1)
5154 APInt C = C1 - 1;
5156 if ((DCI.isBeforeLegalizeOps() ||
5157 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5158 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5159 isLegalICmpImmediate(C.getSExtValue())))) {
5160 return DAG.getSetCC(dl, VT, N0,
5161 DAG.getConstant(C, dl, N1.getValueType()),
5162 NewCC);
5163 }
5164 }
5165 }
5166
5167 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5168 // X <= MAX --> true
5169 if (C1 == MaxVal)
5170 return DAG.getBoolConstant(true, dl, VT, OpVT);
5171
5172 // X <= C0 --> X < (C0 + 1)
5173 if (!VT.isVector()) { // TODO: Support this for vectors.
5174 APInt C = C1 + 1;
5176 if ((DCI.isBeforeLegalizeOps() ||
5177 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5178 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5179 isLegalICmpImmediate(C.getSExtValue())))) {
5180 return DAG.getSetCC(dl, VT, N0,
5181 DAG.getConstant(C, dl, N1.getValueType()),
5182 NewCC);
5183 }
5184 }
5185 }
5186
5187 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5188 if (C1 == MinVal)
5189 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5190
5191 // TODO: Support this for vectors after legalize ops.
5192 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5193 // Canonicalize setlt X, Max --> setne X, Max
5194 if (C1 == MaxVal)
5195 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5196
5197 // If we have setult X, 1, turn it into seteq X, 0
5198 if (C1 == MinVal+1)
5199 return DAG.getSetCC(dl, VT, N0,
5200 DAG.getConstant(MinVal, dl, N0.getValueType()),
5201 ISD::SETEQ);
5202 }
5203 }
5204
5205 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5206 if (C1 == MaxVal)
5207 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5208
5209 // TODO: Support this for vectors after legalize ops.
5210 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5211 // Canonicalize setgt X, Min --> setne X, Min
5212 if (C1 == MinVal)
5213 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5214
5215 // If we have setugt X, Max-1, turn it into seteq X, Max
5216 if (C1 == MaxVal-1)
5217 return DAG.getSetCC(dl, VT, N0,
5218 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5219 ISD::SETEQ);
5220 }
5221 }
5222
5223 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5224 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5225 if (C1.isZero())
5226 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5227 VT, N0, N1, Cond, DCI, dl))
5228 return CC;
5229
5230 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5231 // For example, when high 32-bits of i64 X are known clear:
5232 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5233 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5234 bool CmpZero = N1C->isZero();
5235 bool CmpNegOne = N1C->isAllOnes();
5236 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5237 // Match or(lo,shl(hi,bw/2)) pattern.
5238 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5239 unsigned EltBits = V.getScalarValueSizeInBits();
5240 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5241 return false;
5242 SDValue LHS = V.getOperand(0);
5243 SDValue RHS = V.getOperand(1);
5244 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5245 // Unshifted element must have zero upperbits.
5246 if (RHS.getOpcode() == ISD::SHL &&
5247 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5248 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5249 DAG.MaskedValueIsZero(LHS, HiBits)) {
5250 Lo = LHS;
5251 Hi = RHS.getOperand(0);
5252 return true;
5253 }
5254 if (LHS.getOpcode() == ISD::SHL &&
5255 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5256 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5257 DAG.MaskedValueIsZero(RHS, HiBits)) {
5258 Lo = RHS;
5259 Hi = LHS.getOperand(0);
5260 return true;
5261 }
5262 return false;
5263 };
5264
5265 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5266 unsigned EltBits = N0.getScalarValueSizeInBits();
5267 unsigned HalfBits = EltBits / 2;
5268 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5269 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5270 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5271 SDValue NewN0 =
5272 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5273 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5274 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5275 };
5276
5277 SDValue Lo, Hi;
5278 if (IsConcat(N0, Lo, Hi))
5279 return MergeConcat(Lo, Hi);
5280
5281 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5282 SDValue Lo0, Lo1, Hi0, Hi1;
5283 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5284 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5285 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5286 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5287 }
5288 }
5289 }
5290 }
5291
5292 // If we have "setcc X, C0", check to see if we can shrink the immediate
5293 // by changing cc.
5294 // TODO: Support this for vectors after legalize ops.
5295 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5296 // SETUGT X, SINTMAX -> SETLT X, 0
5297 // SETUGE X, SINTMIN -> SETLT X, 0
5298 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5299 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5300 return DAG.getSetCC(dl, VT, N0,
5301 DAG.getConstant(0, dl, N1.getValueType()),
5302 ISD::SETLT);
5303
5304 // SETULT X, SINTMIN -> SETGT X, -1
5305 // SETULE X, SINTMAX -> SETGT X, -1
5306 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5307 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5308 return DAG.getSetCC(dl, VT, N0,
5309 DAG.getAllOnesConstant(dl, N1.getValueType()),
5310 ISD::SETGT);
5311 }
5312 }
5313
5314 // Back to non-vector simplifications.
5315 // TODO: Can we do these for vector splats?
5316 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5317 const APInt &C1 = N1C->getAPIntValue();
5318 EVT ShValTy = N0.getValueType();
5319
5320 // Fold bit comparisons when we can. This will result in an
5321 // incorrect value when boolean false is negative one, unless
5322 // the bitsize is 1 in which case the false value is the same
5323 // in practice regardless of the representation.
5324 if ((VT.getSizeInBits() == 1 ||
5326 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5327 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5328 N0.getOpcode() == ISD::AND) {
5329 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5330 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5331 // Perform the xform if the AND RHS is a single bit.
5332 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5333 if (AndRHS->getAPIntValue().isPowerOf2() &&
5334 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5335 return DAG.getNode(
5336 ISD::TRUNCATE, dl, VT,
5337 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5338 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5339 }
5340 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5341 // (X & 8) == 8 --> (X & 8) >> 3
5342 // Perform the xform if C1 is a single bit.
5343 unsigned ShCt = C1.logBase2();
5344 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5345 return DAG.getNode(
5346 ISD::TRUNCATE, dl, VT,
5347 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5348 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5349 }
5350 }
5351 }
5352 }
5353
5354 if (C1.getSignificantBits() <= 64 &&
5356 // (X & -256) == 256 -> (X >> 8) == 1
5357 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5358 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5359 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5360 const APInt &AndRHSC = AndRHS->getAPIntValue();
5361 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5362 unsigned ShiftBits = AndRHSC.countr_zero();
5363 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5364 // If using an unsigned shift doesn't yield a legal compare
5365 // immediate, try using sra instead.
5366 APInt NewC = C1.lshr(ShiftBits);
5367 if (NewC.getSignificantBits() <= 64 &&
5369 APInt SignedC = C1.ashr(ShiftBits);
5370 if (SignedC.getSignificantBits() <= 64 &&
5372 SDValue Shift = DAG.getNode(
5373 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5374 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5375 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5376 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5377 }
5378 }
5379 SDValue Shift = DAG.getNode(
5380 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5381 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5382 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5383 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5384 }
5385 }
5386 }
5387 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5388 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5389 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5390 // X < 0x100000000 -> (X >> 32) < 1
5391 // X >= 0x100000000 -> (X >> 32) >= 1
5392 // X <= 0x0ffffffff -> (X >> 32) < 1
5393 // X > 0x0ffffffff -> (X >> 32) >= 1
5394 unsigned ShiftBits;
5395 APInt NewC = C1;
5396 ISD::CondCode NewCond = Cond;
5397 if (AdjOne) {
5398 ShiftBits = C1.countr_one();
5399 NewC = NewC + 1;
5400 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5401 } else {
5402 ShiftBits = C1.countr_zero();
5403 }
5404 NewC.lshrInPlace(ShiftBits);
5405 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5407 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5408 SDValue Shift =
5409 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5410 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5411 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5412 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5413 }
5414 }
5415 }
5416 }
5417
5419 auto *CFP = cast<ConstantFPSDNode>(N1);
5420 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5421
5422 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5423 // constant if knowing that the operand is non-nan is enough. We prefer to
5424 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5425 // materialize 0.0.
5426 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5427 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5428
5429 // setcc (fneg x), C -> setcc swap(pred) x, -C
5430 if (N0.getOpcode() == ISD::FNEG) {
5432 if (DCI.isBeforeLegalizeOps() ||
5433 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5434 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5435 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5436 }
5437 }
5438
5439 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5441 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5442 bool IsFabs = N0.getOpcode() == ISD::FABS;
5443 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5444 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5445 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5446 : (IsFabs ? fcInf : fcPosInf);
5447 if (Cond == ISD::SETUEQ)
5448 Flag |= fcNan;
5449 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5450 DAG.getTargetConstant(Flag, dl, MVT::i32));
5451 }
5452 }
5453
5454 // If the condition is not legal, see if we can find an equivalent one
5455 // which is legal.
5457 // If the comparison was an awkward floating-point == or != and one of
5458 // the comparison operands is infinity or negative infinity, convert the
5459 // condition to a less-awkward <= or >=.
5460 if (CFP->getValueAPF().isInfinity()) {
5461 bool IsNegInf = CFP->getValueAPF().isNegative();
5463 switch (Cond) {
5464 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5465 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5466 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5467 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5468 default: break;
5469 }
5470 if (NewCond != ISD::SETCC_INVALID &&
5471 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5472 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5473 }
5474 }
5475 }
5476
5477 if (N0 == N1) {
5478 // The sext(setcc()) => setcc() optimization relies on the appropriate
5479 // constant being emitted.
5480 assert(!N0.getValueType().isInteger() &&
5481 "Integer types should be handled by FoldSetCC");
5482
5483 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5484 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5485 if (UOF == 2) // FP operators that are undefined on NaNs.
5486 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5487 if (UOF == unsigned(EqTrue))
5488 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5489 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5490 // if it is not already.
5491 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5492 if (NewCond != Cond &&
5493 (DCI.isBeforeLegalizeOps() ||
5494 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5495 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5496 }
5497
5498 // ~X > ~Y --> Y > X
5499 // ~X < ~Y --> Y < X
5500 // ~X < C --> X > ~C
5501 // ~X > C --> X < ~C
5502 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5503 N0.getValueType().isInteger()) {
5504 if (isBitwiseNot(N0)) {
5505 if (isBitwiseNot(N1))
5506 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5507
5510 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5511 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5512 }
5513 }
5514 }
5515
5516 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5517 N0.getValueType().isInteger()) {
5518 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5519 N0.getOpcode() == ISD::XOR) {
5520 // Simplify (X+Y) == (X+Z) --> Y == Z
5521 if (N0.getOpcode() == N1.getOpcode()) {
5522 if (N0.getOperand(0) == N1.getOperand(0))
5523 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5524 if (N0.getOperand(1) == N1.getOperand(1))
5525 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5526 if (isCommutativeBinOp(N0.getOpcode())) {
5527 // If X op Y == Y op X, try other combinations.
5528 if (N0.getOperand(0) == N1.getOperand(1))
5529 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5530 Cond);
5531 if (N0.getOperand(1) == N1.getOperand(0))
5532 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5533 Cond);
5534 }
5535 }
5536
5537 // If RHS is a legal immediate value for a compare instruction, we need
5538 // to be careful about increasing register pressure needlessly.
5539 bool LegalRHSImm = false;
5540
5541 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5542 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5543 // Turn (X+C1) == C2 --> X == C2-C1
5544 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5545 return DAG.getSetCC(
5546 dl, VT, N0.getOperand(0),
5547 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5548 dl, N0.getValueType()),
5549 Cond);
5550
5551 // Turn (X^C1) == C2 --> X == C1^C2
5552 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5553 return DAG.getSetCC(
5554 dl, VT, N0.getOperand(0),
5555 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5556 dl, N0.getValueType()),
5557 Cond);
5558 }
5559
5560 // Turn (C1-X) == C2 --> X == C1-C2
5561 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5562 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5563 return DAG.getSetCC(
5564 dl, VT, N0.getOperand(1),
5565 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5566 dl, N0.getValueType()),
5567 Cond);
5568
5569 // Could RHSC fold directly into a compare?
5570 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5571 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5572 }
5573
5574 // (X+Y) == X --> Y == 0 and similar folds.
5575 // Don't do this if X is an immediate that can fold into a cmp
5576 // instruction and X+Y has other uses. It could be an induction variable
5577 // chain, and the transform would increase register pressure.
5578 if (!LegalRHSImm || N0.hasOneUse())
5579 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5580 return V;
5581 }
5582
5583 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5584 N1.getOpcode() == ISD::XOR)
5585 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5586 return V;
5587
5588 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5589 return V;
5590
5591 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5592 return V;
5593 }
5594
5595 // Fold remainder of division by a constant.
5596 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5597 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5598 // When division is cheap or optimizing for minimum size,
5599 // fall through to DIVREM creation by skipping this fold.
5600 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5601 if (N0.getOpcode() == ISD::UREM) {
5602 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5603 return Folded;
5604 } else if (N0.getOpcode() == ISD::SREM) {
5605 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5606 return Folded;
5607 }
5608 }
5609 }
5610
5611 // Fold away ALL boolean setcc's.
5612 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5613 SDValue Temp;
5614 switch (Cond) {
5615 default: llvm_unreachable("Unknown integer setcc!");
5616 case ISD::SETEQ: // X == Y -> ~(X^Y)
5617 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5618 N0 = DAG.getNOT(dl, Temp, OpVT);
5619 if (!DCI.isCalledByLegalizer())
5620 DCI.AddToWorklist(Temp.getNode());
5621 break;
5622 case ISD::SETNE: // X != Y --> (X^Y)
5623 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5624 break;
5625 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5626 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5627 Temp = DAG.getNOT(dl, N0, OpVT);
5628 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5629 if (!DCI.isCalledByLegalizer())
5630 DCI.AddToWorklist(Temp.getNode());
5631 break;
5632 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5633 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5634 Temp = DAG.getNOT(dl, N1, OpVT);
5635 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5636 if (!DCI.isCalledByLegalizer())
5637 DCI.AddToWorklist(Temp.getNode());
5638 break;
5639 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5640 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5641 Temp = DAG.getNOT(dl, N0, OpVT);
5642 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5643 if (!DCI.isCalledByLegalizer())
5644 DCI.AddToWorklist(Temp.getNode());
5645 break;
5646 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5647 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5648 Temp = DAG.getNOT(dl, N1, OpVT);
5649 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5650 break;
5651 }
5652 if (VT.getScalarType() != MVT::i1) {
5653 if (!DCI.isCalledByLegalizer())
5654 DCI.AddToWorklist(N0.getNode());
5655 // FIXME: If running after legalize, we probably can't do this.
5657 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5658 }
5659 return N0;
5660 }
5661
5662 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5663 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5664 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5666 N1->getFlags().hasNoUnsignedWrap()) ||
5668 N1->getFlags().hasNoSignedWrap())) &&
5670 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5671 }
5672
5673 // Could not fold it.
5674 return SDValue();
5675}
5676
5677/// Returns true (and the GlobalValue and the offset) if the node is a
5678/// GlobalAddress + offset.
5680 int64_t &Offset) const {
5681
5682 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5683
5684 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5685 GA = GASD->getGlobal();
5686 Offset += GASD->getOffset();
5687 return true;
5688 }
5689
5690 if (N->isAnyAdd()) {
5691 SDValue N1 = N->getOperand(0);
5692 SDValue N2 = N->getOperand(1);
5693 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5694 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5695 Offset += V->getSExtValue();
5696 return true;
5697 }
5698 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5699 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5700 Offset += V->getSExtValue();
5701 return true;
5702 }
5703 }
5704 }
5705
5706 return false;
5707}
5708
5710 DAGCombinerInfo &DCI) const {
5711 // Default implementation: no optimization.
5712 return SDValue();
5713}
5714
5715//===----------------------------------------------------------------------===//
5716// Inline Assembler Implementation Methods
5717//===----------------------------------------------------------------------===//
5718
5721 unsigned S = Constraint.size();
5722
5723 if (S == 1) {
5724 switch (Constraint[0]) {
5725 default: break;
5726 case 'r':
5727 return C_RegisterClass;
5728 case 'm': // memory
5729 case 'o': // offsetable
5730 case 'V': // not offsetable
5731 return C_Memory;
5732 case 'p': // Address.
5733 return C_Address;
5734 case 'n': // Simple Integer
5735 case 'E': // Floating Point Constant
5736 case 'F': // Floating Point Constant
5737 return C_Immediate;
5738 case 'i': // Simple Integer or Relocatable Constant
5739 case 's': // Relocatable Constant
5740 case 'X': // Allow ANY value.
5741 case 'I': // Target registers.
5742 case 'J':
5743 case 'K':
5744 case 'L':
5745 case 'M':
5746 case 'N':
5747 case 'O':
5748 case 'P':
5749 case '<':
5750 case '>':
5751 return C_Other;
5752 }
5753 }
5754
5755 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5756 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5757 return C_Memory;
5758 return C_Register;
5759 }
5760 return C_Unknown;
5761}
5762
5763/// Try to replace an X constraint, which matches anything, with another that
5764/// has more specific requirements based on the type of the corresponding
5765/// operand.
5766const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5767 if (ConstraintVT.isInteger())
5768 return "r";
5769 if (ConstraintVT.isFloatingPoint())
5770 return "f"; // works for many targets
5771 return nullptr;
5772}
5773
5775 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5776 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5777 return SDValue();
5778}
5779
5780/// Lower the specified operand into the Ops vector.
5781/// If it is invalid, don't add anything to Ops.
5783 StringRef Constraint,
5784 std::vector<SDValue> &Ops,
5785 SelectionDAG &DAG) const {
5786
5787 if (Constraint.size() > 1)
5788 return;
5789
5790 char ConstraintLetter = Constraint[0];
5791 switch (ConstraintLetter) {
5792 default: break;
5793 case 'X': // Allows any operand
5794 case 'i': // Simple Integer or Relocatable Constant
5795 case 'n': // Simple Integer
5796 case 's': { // Relocatable Constant
5797
5799 uint64_t Offset = 0;
5800
5801 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5802 // etc., since getelementpointer is variadic. We can't use
5803 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5804 // while in this case the GA may be furthest from the root node which is
5805 // likely an ISD::ADD.
5806 while (true) {
5807 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5808 // gcc prints these as sign extended. Sign extend value to 64 bits
5809 // now; without this it would get ZExt'd later in
5810 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5811 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5812 BooleanContent BCont = getBooleanContents(MVT::i64);
5813 ISD::NodeType ExtOpc =
5814 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5815 int64_t ExtVal =
5816 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5817 Ops.push_back(
5818 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5819 return;
5820 }
5821 if (ConstraintLetter != 'n') {
5822 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5823 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5824 GA->getValueType(0),
5825 Offset + GA->getOffset()));
5826 return;
5827 }
5828 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5829 Ops.push_back(DAG.getTargetBlockAddress(
5830 BA->getBlockAddress(), BA->getValueType(0),
5831 Offset + BA->getOffset(), BA->getTargetFlags()));
5832 return;
5833 }
5835 Ops.push_back(Op);
5836 return;
5837 }
5838 }
5839 const unsigned OpCode = Op.getOpcode();
5840 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5841 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5842 Op = Op.getOperand(1);
5843 // Subtraction is not commutative.
5844 else if (OpCode == ISD::ADD &&
5845 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5846 Op = Op.getOperand(0);
5847 else
5848 return;
5849 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5850 continue;
5851 }
5852 return;
5853 }
5854 break;
5855 }
5856 }
5857}
5858
5862
5863std::pair<unsigned, const TargetRegisterClass *>
5865 StringRef Constraint,
5866 MVT VT) const {
5867 if (!Constraint.starts_with("{"))
5868 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5869 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5870
5871 // Remove the braces from around the name.
5872 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5873
5874 std::pair<unsigned, const TargetRegisterClass *> R =
5875 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5876
5877 // Figure out which register class contains this reg.
5878 for (const TargetRegisterClass *RC : RI->regclasses()) {
5879 // If none of the value types for this register class are valid, we
5880 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5881 if (!isLegalRC(*RI, *RC))
5882 continue;
5883
5884 for (const MCPhysReg &PR : *RC) {
5885 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5886 std::pair<unsigned, const TargetRegisterClass *> S =
5887 std::make_pair(PR, RC);
5888
5889 // If this register class has the requested value type, return it,
5890 // otherwise keep searching and return the first class found
5891 // if no other is found which explicitly has the requested type.
5892 if (RI->isTypeLegalForClass(*RC, VT))
5893 return S;
5894 if (!R.second)
5895 R = S;
5896 }
5897 }
5898 }
5899
5900 return R;
5901}
5902
5903//===----------------------------------------------------------------------===//
5904// Constraint Selection.
5905
5906/// Return true of this is an input operand that is a matching constraint like
5907/// "4".
5909 assert(!ConstraintCode.empty() && "No known constraint!");
5910 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5911}
5912
5913/// If this is an input matching constraint, this method returns the output
5914/// operand it matches.
5916 assert(!ConstraintCode.empty() && "No known constraint!");
5917 return atoi(ConstraintCode.c_str());
5918}
5919
5920/// Split up the constraint string from the inline assembly value into the
5921/// specific constraints and their prefixes, and also tie in the associated
5922/// operand values.
5923/// If this returns an empty vector, and if the constraint string itself
5924/// isn't empty, there was an error parsing.
5927 const TargetRegisterInfo *TRI,
5928 const CallBase &Call) const {
5929 /// Information about all of the constraints.
5930 AsmOperandInfoVector ConstraintOperands;
5931 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5932 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5933
5934 // Do a prepass over the constraints, canonicalizing them, and building up the
5935 // ConstraintOperands list.
5936 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5937 unsigned ResNo = 0; // ResNo - The result number of the next output.
5938 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5939
5940 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5941 ConstraintOperands.emplace_back(std::move(CI));
5942 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5943
5944 // Update multiple alternative constraint count.
5945 if (OpInfo.multipleAlternatives.size() > maCount)
5946 maCount = OpInfo.multipleAlternatives.size();
5947
5948 OpInfo.ConstraintVT = MVT::Other;
5949
5950 // Compute the value type for each operand.
5951 switch (OpInfo.Type) {
5953 // Indirect outputs just consume an argument.
5954 if (OpInfo.isIndirect) {
5955 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5956 break;
5957 }
5958
5959 // The return value of the call is this value. As such, there is no
5960 // corresponding argument.
5961 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5962 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5963 OpInfo.ConstraintVT =
5964 getAsmOperandValueType(DL, STy->getElementType(ResNo))
5965 .getSimpleVT();
5966 } else {
5967 assert(ResNo == 0 && "Asm only has one result!");
5968 OpInfo.ConstraintVT =
5970 }
5971 ++ResNo;
5972 break;
5973 case InlineAsm::isInput:
5974 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5975 break;
5976 case InlineAsm::isLabel:
5977 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5978 ++LabelNo;
5979 continue;
5981 // Nothing to do.
5982 break;
5983 }
5984
5985 if (OpInfo.CallOperandVal) {
5986 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5987 if (OpInfo.isIndirect) {
5988 OpTy = Call.getParamElementType(ArgNo);
5989 assert(OpTy && "Indirect operand must have elementtype attribute");
5990 }
5991
5992 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5993 if (StructType *STy = dyn_cast<StructType>(OpTy))
5994 if (STy->getNumElements() == 1)
5995 OpTy = STy->getElementType(0);
5996
5997 // If OpTy is not a single value, it may be a struct/union that we
5998 // can tile with integers.
5999 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6000 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6001 switch (BitSize) {
6002 default: break;
6003 case 1:
6004 case 8:
6005 case 16:
6006 case 32:
6007 case 64:
6008 case 128:
6009 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6010 break;
6011 }
6012 }
6013
6014 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6015 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6016 ArgNo++;
6017 }
6018 }
6019
6020 // If we have multiple alternative constraints, select the best alternative.
6021 if (!ConstraintOperands.empty()) {
6022 if (maCount) {
6023 unsigned bestMAIndex = 0;
6024 int bestWeight = -1;
6025 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6026 int weight = -1;
6027 unsigned maIndex;
6028 // Compute the sums of the weights for each alternative, keeping track
6029 // of the best (highest weight) one so far.
6030 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6031 int weightSum = 0;
6032 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6033 cIndex != eIndex; ++cIndex) {
6034 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6035 if (OpInfo.Type == InlineAsm::isClobber)
6036 continue;
6037
6038 // If this is an output operand with a matching input operand,
6039 // look up the matching input. If their types mismatch, e.g. one
6040 // is an integer, the other is floating point, or their sizes are
6041 // different, flag it as an maCantMatch.
6042 if (OpInfo.hasMatchingInput()) {
6043 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6044 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6045 if ((OpInfo.ConstraintVT.isInteger() !=
6046 Input.ConstraintVT.isInteger()) ||
6047 (OpInfo.ConstraintVT.getSizeInBits() !=
6048 Input.ConstraintVT.getSizeInBits())) {
6049 weightSum = -1; // Can't match.
6050 break;
6051 }
6052 }
6053 }
6054 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6055 if (weight == -1) {
6056 weightSum = -1;
6057 break;
6058 }
6059 weightSum += weight;
6060 }
6061 // Update best.
6062 if (weightSum > bestWeight) {
6063 bestWeight = weightSum;
6064 bestMAIndex = maIndex;
6065 }
6066 }
6067
6068 // Now select chosen alternative in each constraint.
6069 for (AsmOperandInfo &cInfo : ConstraintOperands)
6070 if (cInfo.Type != InlineAsm::isClobber)
6071 cInfo.selectAlternative(bestMAIndex);
6072 }
6073 }
6074
6075 // Check and hook up tied operands, choose constraint code to use.
6076 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6077 cIndex != eIndex; ++cIndex) {
6078 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6079
6080 // If this is an output operand with a matching input operand, look up the
6081 // matching input. If their types mismatch, e.g. one is an integer, the
6082 // other is floating point, or their sizes are different, flag it as an
6083 // error.
6084 if (OpInfo.hasMatchingInput()) {
6085 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6086
6087 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6088 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6089 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6090 OpInfo.ConstraintVT);
6091 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6092 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6093 Input.ConstraintVT);
6094 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6095 OpInfo.ConstraintVT.isFloatingPoint();
6096 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6097 Input.ConstraintVT.isFloatingPoint();
6098 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6099 (MatchRC.second != InputRC.second)) {
6100 report_fatal_error("Unsupported asm: input constraint"
6101 " with a matching output constraint of"
6102 " incompatible type!");
6103 }
6104 }
6105 }
6106 }
6107
6108 return ConstraintOperands;
6109}
6110
6111/// Return a number indicating our preference for chosing a type of constraint
6112/// over another, for the purpose of sorting them. Immediates are almost always
6113/// preferrable (when they can be emitted). A higher return value means a
6114/// stronger preference for one constraint type relative to another.
6115/// FIXME: We should prefer registers over memory but doing so may lead to
6116/// unrecoverable register exhaustion later.
6117/// https://github.com/llvm/llvm-project/issues/20571
6119 switch (CT) {
6122 return 4;
6125 return 3;
6127 return 2;
6129 return 1;
6131 return 0;
6132 }
6133 llvm_unreachable("Invalid constraint type");
6134}
6135
6136/// Examine constraint type and operand type and determine a weight value.
6137/// This object must already have been set up with the operand type
6138/// and the current alternative constraint selected.
6141 AsmOperandInfo &info, int maIndex) const {
6143 if (maIndex >= (int)info.multipleAlternatives.size())
6144 rCodes = &info.Codes;
6145 else
6146 rCodes = &info.multipleAlternatives[maIndex].Codes;
6147 ConstraintWeight BestWeight = CW_Invalid;
6148
6149 // Loop over the options, keeping track of the most general one.
6150 for (const std::string &rCode : *rCodes) {
6151 ConstraintWeight weight =
6152 getSingleConstraintMatchWeight(info, rCode.c_str());
6153 if (weight > BestWeight)
6154 BestWeight = weight;
6155 }
6156
6157 return BestWeight;
6158}
6159
6160/// Examine constraint type and operand type and determine a weight value.
6161/// This object must already have been set up with the operand type
6162/// and the current alternative constraint selected.
6165 AsmOperandInfo &info, const char *constraint) const {
6167 Value *CallOperandVal = info.CallOperandVal;
6168 // If we don't have a value, we can't do a match,
6169 // but allow it at the lowest weight.
6170 if (!CallOperandVal)
6171 return CW_Default;
6172 // Look at the constraint type.
6173 switch (*constraint) {
6174 case 'i': // immediate integer.
6175 case 'n': // immediate integer with a known value.
6176 if (isa<ConstantInt>(CallOperandVal))
6177 weight = CW_Constant;
6178 break;
6179 case 's': // non-explicit intregal immediate.
6180 if (isa<GlobalValue>(CallOperandVal))
6181 weight = CW_Constant;
6182 break;
6183 case 'E': // immediate float if host format.
6184 case 'F': // immediate float.
6185 if (isa<ConstantFP>(CallOperandVal))
6186 weight = CW_Constant;
6187 break;
6188 case '<': // memory operand with autodecrement.
6189 case '>': // memory operand with autoincrement.
6190 case 'm': // memory operand.
6191 case 'o': // offsettable memory operand
6192 case 'V': // non-offsettable memory operand
6193 weight = CW_Memory;
6194 break;
6195 case 'r': // general register.
6196 case 'g': // general register, memory operand or immediate integer.
6197 // note: Clang converts "g" to "imr".
6198 if (CallOperandVal->getType()->isIntegerTy())
6199 weight = CW_Register;
6200 break;
6201 case 'X': // any operand.
6202 default:
6203 weight = CW_Default;
6204 break;
6205 }
6206 return weight;
6207}
6208
6209/// If there are multiple different constraints that we could pick for this
6210/// operand (e.g. "imr") try to pick the 'best' one.
6211/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6212/// into seven classes:
6213/// Register -> one specific register
6214/// RegisterClass -> a group of regs
6215/// Memory -> memory
6216/// Address -> a symbolic memory reference
6217/// Immediate -> immediate values
6218/// Other -> magic values (such as "Flag Output Operands")
6219/// Unknown -> something we don't recognize yet and can't handle
6220/// Ideally, we would pick the most specific constraint possible: if we have
6221/// something that fits into a register, we would pick it. The problem here
6222/// is that if we have something that could either be in a register or in
6223/// memory that use of the register could cause selection of *other*
6224/// operands to fail: they might only succeed if we pick memory. Because of
6225/// this the heuristic we use is:
6226///
6227/// 1) If there is an 'other' constraint, and if the operand is valid for
6228/// that constraint, use it. This makes us take advantage of 'i'
6229/// constraints when available.
6230/// 2) Otherwise, pick the most general constraint present. This prefers
6231/// 'm' over 'r', for example.
6232///
6234 TargetLowering::AsmOperandInfo &OpInfo) const {
6235 ConstraintGroup Ret;
6236
6237 Ret.reserve(OpInfo.Codes.size());
6238 for (StringRef Code : OpInfo.Codes) {
6240
6241 // Indirect 'other' or 'immediate' constraints are not allowed.
6242 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6243 CType == TargetLowering::C_Register ||
6245 continue;
6246
6247 // Things with matching constraints can only be registers, per gcc
6248 // documentation. This mainly affects "g" constraints.
6249 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6250 continue;
6251
6252 Ret.emplace_back(Code, CType);
6253 }
6254
6256 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6257 });
6258
6259 return Ret;
6260}
6261
6262/// If we have an immediate, see if we can lower it. Return true if we can,
6263/// false otherwise.
6265 SDValue Op, SelectionDAG *DAG,
6266 const TargetLowering &TLI) {
6267
6268 assert((P.second == TargetLowering::C_Other ||
6269 P.second == TargetLowering::C_Immediate) &&
6270 "need immediate or other");
6271
6272 if (!Op.getNode())
6273 return false;
6274
6275 std::vector<SDValue> ResultOps;
6276 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6277 return !ResultOps.empty();
6278}
6279
6280/// Determines the constraint code and constraint type to use for the specific
6281/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6283 SDValue Op,
6284 SelectionDAG *DAG) const {
6285 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6286
6287 // Single-letter constraints ('r') are very common.
6288 if (OpInfo.Codes.size() == 1) {
6289 OpInfo.ConstraintCode = OpInfo.Codes[0];
6290 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6291 } else {
6293 if (G.empty())
6294 return;
6295
6296 unsigned BestIdx = 0;
6297 for (const unsigned E = G.size();
6298 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6299 G[BestIdx].second == TargetLowering::C_Immediate);
6300 ++BestIdx) {
6301 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6302 break;
6303 // If we're out of constraints, just pick the first one.
6304 if (BestIdx + 1 == E) {
6305 BestIdx = 0;
6306 break;
6307 }
6308 }
6309
6310 OpInfo.ConstraintCode = G[BestIdx].first;
6311 OpInfo.ConstraintType = G[BestIdx].second;
6312 }
6313
6314 // 'X' matches anything.
6315 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6316 // Constants are handled elsewhere. For Functions, the type here is the
6317 // type of the result, which is not what we want to look at; leave them
6318 // alone.
6319 Value *v = OpInfo.CallOperandVal;
6320 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6321 return;
6322 }
6323
6324 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6325 OpInfo.ConstraintCode = "i";
6326 return;
6327 }
6328
6329 // Otherwise, try to resolve it to something we know about by looking at
6330 // the actual operand type.
6331 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6332 OpInfo.ConstraintCode = Repl;
6333 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6334 }
6335 }
6336}
6337
6338/// Given an exact SDIV by a constant, create a multiplication
6339/// with the multiplicative inverse of the constant.
6340/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6342 const SDLoc &dl, SelectionDAG &DAG,
6343 SmallVectorImpl<SDNode *> &Created) {
6344 SDValue Op0 = N->getOperand(0);
6345 SDValue Op1 = N->getOperand(1);
6346 EVT VT = N->getValueType(0);
6347 EVT SVT = VT.getScalarType();
6348 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6349 EVT ShSVT = ShVT.getScalarType();
6350
6351 bool UseSRA = false;
6352 SmallVector<SDValue, 16> Shifts, Factors;
6353
6354 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6355 if (C->isZero())
6356 return false;
6357 APInt Divisor = C->getAPIntValue();
6358 unsigned Shift = Divisor.countr_zero();
6359 if (Shift) {
6360 Divisor.ashrInPlace(Shift);
6361 UseSRA = true;
6362 }
6363 APInt Factor = Divisor.multiplicativeInverse();
6364 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6365 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6366 return true;
6367 };
6368
6369 // Collect all magic values from the build vector.
6370 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6371 return SDValue();
6372
6373 SDValue Shift, Factor;
6374 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6375 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6376 Factor = DAG.getBuildVector(VT, dl, Factors);
6377 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6378 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6379 "Expected matchUnaryPredicate to return one element for scalable "
6380 "vectors");
6381 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6382 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6383 } else {
6384 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6385 Shift = Shifts[0];
6386 Factor = Factors[0];
6387 }
6388
6389 SDValue Res = Op0;
6390 if (UseSRA) {
6391 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6392 Created.push_back(Res.getNode());
6393 }
6394
6395 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6396}
6397
6398/// Given an exact UDIV by a constant, create a multiplication
6399/// with the multiplicative inverse of the constant.
6400/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6402 const SDLoc &dl, SelectionDAG &DAG,
6403 SmallVectorImpl<SDNode *> &Created) {
6404 EVT VT = N->getValueType(0);
6405 EVT SVT = VT.getScalarType();
6406 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6407 EVT ShSVT = ShVT.getScalarType();
6408
6409 bool UseSRL = false;
6410 SmallVector<SDValue, 16> Shifts, Factors;
6411
6412 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6413 if (C->isZero())
6414 return false;
6415 APInt Divisor = C->getAPIntValue();
6416 unsigned Shift = Divisor.countr_zero();
6417 if (Shift) {
6418 Divisor.lshrInPlace(Shift);
6419 UseSRL = true;
6420 }
6421 // Calculate the multiplicative inverse modulo BW.
6422 APInt Factor = Divisor.multiplicativeInverse();
6423 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6424 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6425 return true;
6426 };
6427
6428 SDValue Op1 = N->getOperand(1);
6429
6430 // Collect all magic values from the build vector.
6431 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6432 return SDValue();
6433
6434 SDValue Shift, Factor;
6435 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6436 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6437 Factor = DAG.getBuildVector(VT, dl, Factors);
6438 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6439 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6440 "Expected matchUnaryPredicate to return one element for scalable "
6441 "vectors");
6442 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6443 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6444 } else {
6445 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6446 Shift = Shifts[0];
6447 Factor = Factors[0];
6448 }
6449
6450 SDValue Res = N->getOperand(0);
6451 if (UseSRL) {
6452 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6453 Created.push_back(Res.getNode());
6454 }
6455
6456 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6457}
6458
6460 SelectionDAG &DAG,
6461 SmallVectorImpl<SDNode *> &Created) const {
6462 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6463 if (isIntDivCheap(N->getValueType(0), Attr))
6464 return SDValue(N, 0); // Lower SDIV as SDIV
6465 return SDValue();
6466}
6467
6468SDValue
6470 SelectionDAG &DAG,
6471 SmallVectorImpl<SDNode *> &Created) const {
6472 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6473 if (isIntDivCheap(N->getValueType(0), Attr))
6474 return SDValue(N, 0); // Lower SREM as SREM
6475 return SDValue();
6476}
6477
6478/// Build sdiv by power-of-2 with conditional move instructions
6479/// Ref: "Hacker's Delight" by Henry Warren 10-1
6480/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6481/// bgez x, label
6482/// add x, x, 2**k-1
6483/// label:
6484/// sra res, x, k
6485/// neg res, res (when the divisor is negative)
6487 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6488 SmallVectorImpl<SDNode *> &Created) const {
6489 unsigned Lg2 = Divisor.countr_zero();
6490 EVT VT = N->getValueType(0);
6491
6492 SDLoc DL(N);
6493 SDValue N0 = N->getOperand(0);
6494 SDValue Zero = DAG.getConstant(0, DL, VT);
6495 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6496 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6497
6498 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6499 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6500 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6501 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6502 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6503
6504 Created.push_back(Cmp.getNode());
6505 Created.push_back(Add.getNode());
6506 Created.push_back(CMov.getNode());
6507
6508 // Divide by pow2.
6509 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6510 DAG.getShiftAmountConstant(Lg2, VT, DL));
6511
6512 // If we're dividing by a positive value, we're done. Otherwise, we must
6513 // negate the result.
6514 if (Divisor.isNonNegative())
6515 return SRA;
6516
6517 Created.push_back(SRA.getNode());
6518 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6519}
6520
6521/// Given an ISD::SDIV node expressing a divide by constant,
6522/// return a DAG expression to select that will generate the same value by
6523/// multiplying by a magic number.
6524/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6526 bool IsAfterLegalization,
6527 bool IsAfterLegalTypes,
6528 SmallVectorImpl<SDNode *> &Created) const {
6529 SDLoc dl(N);
6530 EVT VT = N->getValueType(0);
6531 EVT SVT = VT.getScalarType();
6532 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6533 EVT ShSVT = ShVT.getScalarType();
6534 unsigned EltBits = VT.getScalarSizeInBits();
6535 EVT MulVT;
6536
6537 // Check to see if we can do this.
6538 // FIXME: We should be more aggressive here.
6539 if (!isTypeLegal(VT)) {
6540 // Limit this to simple scalars for now.
6541 if (VT.isVector() || !VT.isSimple())
6542 return SDValue();
6543
6544 // If this type will be promoted to a large enough type with a legal
6545 // multiply operation, we can go ahead and do this transform.
6547 return SDValue();
6548
6549 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6550 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6551 !isOperationLegal(ISD::MUL, MulVT))
6552 return SDValue();
6553 }
6554
6555 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6556 if (N->getFlags().hasExact())
6557 return BuildExactSDIV(*this, N, dl, DAG, Created);
6558
6559 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6560
6561 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6562 if (C->isZero())
6563 return false;
6564
6565 const APInt &Divisor = C->getAPIntValue();
6567 int NumeratorFactor = 0;
6568 int ShiftMask = -1;
6569
6570 if (Divisor.isOne() || Divisor.isAllOnes()) {
6571 // If d is +1/-1, we just multiply the numerator by +1/-1.
6572 NumeratorFactor = Divisor.getSExtValue();
6573 magics.Magic = 0;
6574 magics.ShiftAmount = 0;
6575 ShiftMask = 0;
6576 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6577 // If d > 0 and m < 0, add the numerator.
6578 NumeratorFactor = 1;
6579 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6580 // If d < 0 and m > 0, subtract the numerator.
6581 NumeratorFactor = -1;
6582 }
6583
6584 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6585 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6586 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6587 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6588 return true;
6589 };
6590
6591 SDValue N0 = N->getOperand(0);
6592 SDValue N1 = N->getOperand(1);
6593
6594 // Collect the shifts / magic values from each element.
6595 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6596 return SDValue();
6597
6598 SDValue MagicFactor, Factor, Shift, ShiftMask;
6599 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6600 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6601 Factor = DAG.getBuildVector(VT, dl, Factors);
6602 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6603 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6604 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6605 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6606 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6607 "Expected matchUnaryPredicate to return one element for scalable "
6608 "vectors");
6609 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6610 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6611 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6612 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6613 } else {
6614 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6615 MagicFactor = MagicFactors[0];
6616 Factor = Factors[0];
6617 Shift = Shifts[0];
6618 ShiftMask = ShiftMasks[0];
6619 }
6620
6621 // Multiply the numerator (operand 0) by the magic value.
6622 // FIXME: We should support doing a MUL in a wider type.
6623 auto GetMULHS = [&](SDValue X, SDValue Y) {
6624 // If the type isn't legal, use a wider mul of the type calculated
6625 // earlier.
6626 if (!isTypeLegal(VT)) {
6627 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6628 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6629 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6630 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6631 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6632 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6633 }
6634
6635 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6636 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6637 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6638 SDValue LoHi =
6639 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6640 return SDValue(LoHi.getNode(), 1);
6641 }
6642 // If type twice as wide legal, widen and use a mul plus a shift.
6643 unsigned Size = VT.getScalarSizeInBits();
6644 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6645 if (VT.isVector())
6646 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6648 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6649 // custom lowered. This is very expensive so avoid it at all costs for
6650 // constant divisors.
6651 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6654 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6655 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6656 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6657 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6658 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6659 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6660 }
6661 return SDValue();
6662 };
6663
6664 SDValue Q = GetMULHS(N0, MagicFactor);
6665 if (!Q)
6666 return SDValue();
6667
6668 Created.push_back(Q.getNode());
6669
6670 // (Optionally) Add/subtract the numerator using Factor.
6671 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6672 Created.push_back(Factor.getNode());
6673 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6674 Created.push_back(Q.getNode());
6675
6676 // Shift right algebraic by shift value.
6677 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6678 Created.push_back(Q.getNode());
6679
6680 // Extract the sign bit, mask it and add it to the quotient.
6681 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6682 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6683 Created.push_back(T.getNode());
6684 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6685 Created.push_back(T.getNode());
6686 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6687}
6688
6689/// Given an ISD::UDIV node expressing a divide by constant,
6690/// return a DAG expression to select that will generate the same value by
6691/// multiplying by a magic number.
6692/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6694 bool IsAfterLegalization,
6695 bool IsAfterLegalTypes,
6696 SmallVectorImpl<SDNode *> &Created) const {
6697 SDLoc dl(N);
6698 EVT VT = N->getValueType(0);
6699 EVT SVT = VT.getScalarType();
6700 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6701 EVT ShSVT = ShVT.getScalarType();
6702 unsigned EltBits = VT.getScalarSizeInBits();
6703 EVT MulVT;
6704
6705 // Check to see if we can do this.
6706 // FIXME: We should be more aggressive here.
6707 if (!isTypeLegal(VT)) {
6708 // Limit this to simple scalars for now.
6709 if (VT.isVector() || !VT.isSimple())
6710 return SDValue();
6711
6712 // If this type will be promoted to a large enough type with a legal
6713 // multiply operation, we can go ahead and do this transform.
6715 return SDValue();
6716
6717 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6718 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6719 !isOperationLegal(ISD::MUL, MulVT))
6720 return SDValue();
6721 }
6722
6723 // If the udiv has an 'exact' bit we can use a simpler lowering.
6724 if (N->getFlags().hasExact())
6725 return BuildExactUDIV(*this, N, dl, DAG, Created);
6726
6727 SDValue N0 = N->getOperand(0);
6728 SDValue N1 = N->getOperand(1);
6729
6730 // Try to use leading zeros of the dividend to reduce the multiplier and
6731 // avoid expensive fixups.
6732 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6733
6734 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6735 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6736
6737 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6738 if (C->isZero())
6739 return false;
6740 const APInt& Divisor = C->getAPIntValue();
6741
6742 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6743
6744 // Magic algorithm doesn't work for division by 1. We need to emit a select
6745 // at the end.
6746 if (Divisor.isOne()) {
6747 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6748 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6749 } else {
6752 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6753
6754 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6755
6756 assert(magics.PreShift < Divisor.getBitWidth() &&
6757 "We shouldn't generate an undefined shift!");
6758 assert(magics.PostShift < Divisor.getBitWidth() &&
6759 "We shouldn't generate an undefined shift!");
6760 assert((!magics.IsAdd || magics.PreShift == 0) &&
6761 "Unexpected pre-shift");
6762 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6763 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6764 NPQFactor = DAG.getConstant(
6765 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6766 : APInt::getZero(EltBits),
6767 dl, SVT);
6768 UseNPQ |= magics.IsAdd;
6769 UsePreShift |= magics.PreShift != 0;
6770 UsePostShift |= magics.PostShift != 0;
6771 }
6772
6773 PreShifts.push_back(PreShift);
6774 MagicFactors.push_back(MagicFactor);
6775 NPQFactors.push_back(NPQFactor);
6776 PostShifts.push_back(PostShift);
6777 return true;
6778 };
6779
6780 // Collect the shifts/magic values from each element.
6781 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6782 return SDValue();
6783
6784 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6785 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6786 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6787 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6788 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6789 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6790 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6791 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6792 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6793 "Expected matchUnaryPredicate to return one for scalable vectors");
6794 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6795 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6796 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6797 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6798 } else {
6799 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6800 PreShift = PreShifts[0];
6801 MagicFactor = MagicFactors[0];
6802 PostShift = PostShifts[0];
6803 }
6804
6805 SDValue Q = N0;
6806 if (UsePreShift) {
6807 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6808 Created.push_back(Q.getNode());
6809 }
6810
6811 // FIXME: We should support doing a MUL in a wider type.
6812 auto GetMULHU = [&](SDValue X, SDValue Y) {
6813 // If the type isn't legal, use a wider mul of the type calculated
6814 // earlier.
6815 if (!isTypeLegal(VT)) {
6816 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6817 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6818 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6819 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6820 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6821 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6822 }
6823
6824 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6825 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6826 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6827 SDValue LoHi =
6828 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6829 return SDValue(LoHi.getNode(), 1);
6830 }
6831 // If type twice as wide legal, widen and use a mul plus a shift.
6832 unsigned Size = VT.getScalarSizeInBits();
6833 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6834 if (VT.isVector())
6835 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6837 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6838 // custom lowered. This is very expensive so avoid it at all costs for
6839 // constant divisors.
6840 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6843 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6844 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6845 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6846 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6847 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6848 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6849 }
6850 return SDValue(); // No mulhu or equivalent
6851 };
6852
6853 // Multiply the numerator (operand 0) by the magic value.
6854 Q = GetMULHU(Q, MagicFactor);
6855 if (!Q)
6856 return SDValue();
6857
6858 Created.push_back(Q.getNode());
6859
6860 if (UseNPQ) {
6861 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6862 Created.push_back(NPQ.getNode());
6863
6864 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6865 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6866 if (VT.isVector())
6867 NPQ = GetMULHU(NPQ, NPQFactor);
6868 else
6869 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6870
6871 Created.push_back(NPQ.getNode());
6872
6873 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6874 Created.push_back(Q.getNode());
6875 }
6876
6877 if (UsePostShift) {
6878 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6879 Created.push_back(Q.getNode());
6880 }
6881
6882 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6883
6884 SDValue One = DAG.getConstant(1, dl, VT);
6885 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6886 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6887}
6888
6889/// If all values in Values that *don't* match the predicate are same 'splat'
6890/// value, then replace all values with that splat value.
6891/// Else, if AlternativeReplacement was provided, then replace all values that
6892/// do match predicate with AlternativeReplacement value.
6893static void
6895 std::function<bool(SDValue)> Predicate,
6896 SDValue AlternativeReplacement = SDValue()) {
6897 SDValue Replacement;
6898 // Is there a value for which the Predicate does *NOT* match? What is it?
6899 auto SplatValue = llvm::find_if_not(Values, Predicate);
6900 if (SplatValue != Values.end()) {
6901 // Does Values consist only of SplatValue's and values matching Predicate?
6902 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6903 return Value == *SplatValue || Predicate(Value);
6904 })) // Then we shall replace values matching predicate with SplatValue.
6905 Replacement = *SplatValue;
6906 }
6907 if (!Replacement) {
6908 // Oops, we did not find the "baseline" splat value.
6909 if (!AlternativeReplacement)
6910 return; // Nothing to do.
6911 // Let's replace with provided value then.
6912 Replacement = AlternativeReplacement;
6913 }
6914 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6915}
6916
6917/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6918/// where the divisor is constant and the comparison target is zero,
6919/// return a DAG expression that will generate the same comparison result
6920/// using only multiplications, additions and shifts/rotations.
6921/// Ref: "Hacker's Delight" 10-17.
6922SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6923 SDValue CompTargetNode,
6925 DAGCombinerInfo &DCI,
6926 const SDLoc &DL) const {
6928 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6929 DCI, DL, Built)) {
6930 for (SDNode *N : Built)
6931 DCI.AddToWorklist(N);
6932 return Folded;
6933 }
6934
6935 return SDValue();
6936}
6937
6938SDValue
6939TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6940 SDValue CompTargetNode, ISD::CondCode Cond,
6941 DAGCombinerInfo &DCI, const SDLoc &DL,
6942 SmallVectorImpl<SDNode *> &Created) const {
6943 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6944 // - D must be constant, with D = D0 * 2^K where D0 is odd
6945 // - P is the multiplicative inverse of D0 modulo 2^W
6946 // - Q = floor(((2^W) - 1) / D)
6947 // where W is the width of the common type of N and D.
6948 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6949 "Only applicable for (in)equality comparisons.");
6950
6951 SelectionDAG &DAG = DCI.DAG;
6952
6953 EVT VT = REMNode.getValueType();
6954 EVT SVT = VT.getScalarType();
6955 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6956 EVT ShSVT = ShVT.getScalarType();
6957
6958 // If MUL is unavailable, we cannot proceed in any case.
6959 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6960 return SDValue();
6961
6962 bool ComparingWithAllZeros = true;
6963 bool AllComparisonsWithNonZerosAreTautological = true;
6964 bool HadTautologicalLanes = false;
6965 bool AllLanesAreTautological = true;
6966 bool HadEvenDivisor = false;
6967 bool AllDivisorsArePowerOfTwo = true;
6968 bool HadTautologicalInvertedLanes = false;
6969 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
6970
6971 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6972 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6973 if (CDiv->isZero())
6974 return false;
6975
6976 const APInt &D = CDiv->getAPIntValue();
6977 const APInt &Cmp = CCmp->getAPIntValue();
6978
6979 ComparingWithAllZeros &= Cmp.isZero();
6980
6981 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6982 // if C2 is not less than C1, the comparison is always false.
6983 // But we will only be able to produce the comparison that will give the
6984 // opposive tautological answer. So this lane would need to be fixed up.
6985 bool TautologicalInvertedLane = D.ule(Cmp);
6986 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6987
6988 // If all lanes are tautological (either all divisors are ones, or divisor
6989 // is not greater than the constant we are comparing with),
6990 // we will prefer to avoid the fold.
6991 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6992 HadTautologicalLanes |= TautologicalLane;
6993 AllLanesAreTautological &= TautologicalLane;
6994
6995 // If we are comparing with non-zero, we need'll need to subtract said
6996 // comparison value from the LHS. But there is no point in doing that if
6997 // every lane where we are comparing with non-zero is tautological..
6998 if (!Cmp.isZero())
6999 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7000
7001 // Decompose D into D0 * 2^K
7002 unsigned K = D.countr_zero();
7003 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7004 APInt D0 = D.lshr(K);
7005
7006 // D is even if it has trailing zeros.
7007 HadEvenDivisor |= (K != 0);
7008 // D is a power-of-two if D0 is one.
7009 // If all divisors are power-of-two, we will prefer to avoid the fold.
7010 AllDivisorsArePowerOfTwo &= D0.isOne();
7011
7012 // P = inv(D0, 2^W)
7013 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7014 unsigned W = D.getBitWidth();
7015 APInt P = D0.multiplicativeInverse();
7016 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7017
7018 // Q = floor((2^W - 1) u/ D)
7019 // R = ((2^W - 1) u% D)
7020 APInt Q, R;
7022
7023 // If we are comparing with zero, then that comparison constant is okay,
7024 // else it may need to be one less than that.
7025 if (Cmp.ugt(R))
7026 Q -= 1;
7027
7029 "We are expecting that K is always less than all-ones for ShSVT");
7030
7031 // If the lane is tautological the result can be constant-folded.
7032 if (TautologicalLane) {
7033 // Set P and K amount to a bogus values so we can try to splat them.
7034 P = 0;
7035 K = -1;
7036 // And ensure that comparison constant is tautological,
7037 // it will always compare true/false.
7038 Q = -1;
7039 }
7040
7041 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7042 KAmts.push_back(
7043 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7044 /*implicitTrunc=*/true),
7045 DL, ShSVT));
7046 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7047 return true;
7048 };
7049
7050 SDValue N = REMNode.getOperand(0);
7051 SDValue D = REMNode.getOperand(1);
7052
7053 // Collect the values from each element.
7054 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7055 return SDValue();
7056
7057 // If all lanes are tautological, the result can be constant-folded.
7058 if (AllLanesAreTautological)
7059 return SDValue();
7060
7061 // If this is a urem by a powers-of-two, avoid the fold since it can be
7062 // best implemented as a bit test.
7063 if (AllDivisorsArePowerOfTwo)
7064 return SDValue();
7065
7066 SDValue PVal, KVal, QVal;
7067 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7068 if (HadTautologicalLanes) {
7069 // Try to turn PAmts into a splat, since we don't care about the values
7070 // that are currently '0'. If we can't, just keep '0'`s.
7072 // Try to turn KAmts into a splat, since we don't care about the values
7073 // that are currently '-1'. If we can't, change them to '0'`s.
7075 DAG.getConstant(0, DL, ShSVT));
7076 }
7077
7078 PVal = DAG.getBuildVector(VT, DL, PAmts);
7079 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7080 QVal = DAG.getBuildVector(VT, DL, QAmts);
7081 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7082 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7083 "Expected matchBinaryPredicate to return one element for "
7084 "SPLAT_VECTORs");
7085 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7086 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7087 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7088 } else {
7089 PVal = PAmts[0];
7090 KVal = KAmts[0];
7091 QVal = QAmts[0];
7092 }
7093
7094 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7095 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7096 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7097 assert(CompTargetNode.getValueType() == N.getValueType() &&
7098 "Expecting that the types on LHS and RHS of comparisons match.");
7099 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7100 }
7101
7102 // (mul N, P)
7103 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7104 Created.push_back(Op0.getNode());
7105
7106 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7107 // divisors as a performance improvement, since rotating by 0 is a no-op.
7108 if (HadEvenDivisor) {
7109 // We need ROTR to do this.
7110 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7111 return SDValue();
7112 // UREM: (rotr (mul N, P), K)
7113 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7114 Created.push_back(Op0.getNode());
7115 }
7116
7117 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7118 SDValue NewCC =
7119 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7121 if (!HadTautologicalInvertedLanes)
7122 return NewCC;
7123
7124 // If any lanes previously compared always-false, the NewCC will give
7125 // always-true result for them, so we need to fixup those lanes.
7126 // Or the other way around for inequality predicate.
7127 assert(VT.isVector() && "Can/should only get here for vectors.");
7128 Created.push_back(NewCC.getNode());
7129
7130 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7131 // if C2 is not less than C1, the comparison is always false.
7132 // But we have produced the comparison that will give the
7133 // opposive tautological answer. So these lanes would need to be fixed up.
7134 SDValue TautologicalInvertedChannels =
7135 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7136 Created.push_back(TautologicalInvertedChannels.getNode());
7137
7138 // NOTE: we avoid letting illegal types through even if we're before legalize
7139 // ops – legalization has a hard time producing good code for this.
7140 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7141 // If we have a vector select, let's replace the comparison results in the
7142 // affected lanes with the correct tautological result.
7143 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7144 DL, SETCCVT, SETCCVT);
7145 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7146 Replacement, NewCC);
7147 }
7148
7149 // Else, we can just invert the comparison result in the appropriate lanes.
7150 //
7151 // NOTE: see the note above VSELECT above.
7152 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7153 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7154 TautologicalInvertedChannels);
7155
7156 return SDValue(); // Don't know how to lower.
7157}
7158
7159/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7160/// where the divisor is constant and the comparison target is zero,
7161/// return a DAG expression that will generate the same comparison result
7162/// using only multiplications, additions and shifts/rotations.
7163/// Ref: "Hacker's Delight" 10-17.
7164SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7165 SDValue CompTargetNode,
7167 DAGCombinerInfo &DCI,
7168 const SDLoc &DL) const {
7170 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7171 DCI, DL, Built)) {
7172 assert(Built.size() <= 7 && "Max size prediction failed.");
7173 for (SDNode *N : Built)
7174 DCI.AddToWorklist(N);
7175 return Folded;
7176 }
7177
7178 return SDValue();
7179}
7180
7181SDValue
7182TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7183 SDValue CompTargetNode, ISD::CondCode Cond,
7184 DAGCombinerInfo &DCI, const SDLoc &DL,
7185 SmallVectorImpl<SDNode *> &Created) const {
7186 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7187 // Fold:
7188 // (seteq/ne (srem N, D), 0)
7189 // To:
7190 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7191 //
7192 // - D must be constant, with D = D0 * 2^K where D0 is odd
7193 // - P is the multiplicative inverse of D0 modulo 2^W
7194 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7195 // - Q = floor((2 * A) / (2^K))
7196 // where W is the width of the common type of N and D.
7197 //
7198 // When D is a power of two (and thus D0 is 1), the normal
7199 // formula for A and Q don't apply, because the derivation
7200 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7201 // does not apply. This specifically fails when N = INT_MIN.
7202 //
7203 // Instead, for power-of-two D, we use:
7204 // - A = 2^(W-1)
7205 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7206 // - Q = 2^(W-K) - 1
7207 // |-> Test that the top K bits are zero after rotation
7208 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7209 "Only applicable for (in)equality comparisons.");
7210
7211 SelectionDAG &DAG = DCI.DAG;
7212
7213 EVT VT = REMNode.getValueType();
7214 EVT SVT = VT.getScalarType();
7215 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7216 EVT ShSVT = ShVT.getScalarType();
7217
7218 // If we are after ops legalization, and MUL is unavailable, we can not
7219 // proceed.
7220 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7221 return SDValue();
7222
7223 // TODO: Could support comparing with non-zero too.
7224 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7225 if (!CompTarget || !CompTarget->isZero())
7226 return SDValue();
7227
7228 bool HadIntMinDivisor = false;
7229 bool HadOneDivisor = false;
7230 bool AllDivisorsAreOnes = true;
7231 bool HadEvenDivisor = false;
7232 bool NeedToApplyOffset = false;
7233 bool AllDivisorsArePowerOfTwo = true;
7234 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7235
7236 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7237 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7238 if (C->isZero())
7239 return false;
7240
7241 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7242
7243 // WARNING: this fold is only valid for positive divisors!
7244 APInt D = C->getAPIntValue();
7245 if (D.isNegative())
7246 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7247
7248 HadIntMinDivisor |= D.isMinSignedValue();
7249
7250 // If all divisors are ones, we will prefer to avoid the fold.
7251 HadOneDivisor |= D.isOne();
7252 AllDivisorsAreOnes &= D.isOne();
7253
7254 // Decompose D into D0 * 2^K
7255 unsigned K = D.countr_zero();
7256 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7257 APInt D0 = D.lshr(K);
7258
7259 if (!D.isMinSignedValue()) {
7260 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7261 // we don't care about this lane in this fold, we'll special-handle it.
7262 HadEvenDivisor |= (K != 0);
7263 }
7264
7265 // D is a power-of-two if D0 is one. This includes INT_MIN.
7266 // If all divisors are power-of-two, we will prefer to avoid the fold.
7267 AllDivisorsArePowerOfTwo &= D0.isOne();
7268
7269 // P = inv(D0, 2^W)
7270 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7271 unsigned W = D.getBitWidth();
7272 APInt P = D0.multiplicativeInverse();
7273 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7274
7275 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7276 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7277 A.clearLowBits(K);
7278
7279 if (!D.isMinSignedValue()) {
7280 // If divisor INT_MIN, then we don't care about this lane in this fold,
7281 // we'll special-handle it.
7282 NeedToApplyOffset |= A != 0;
7283 }
7284
7285 // Q = floor((2 * A) / (2^K))
7286 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7287
7289 "We are expecting that A is always less than all-ones for SVT");
7291 "We are expecting that K is always less than all-ones for ShSVT");
7292
7293 // If D was a power of two, apply the alternate constant derivation.
7294 if (D0.isOne()) {
7295 // A = 2^(W-1)
7297 // - Q = 2^(W-K) - 1
7298 Q = APInt::getAllOnes(W - K).zext(W);
7299 }
7300
7301 // If the divisor is 1 the result can be constant-folded. Likewise, we
7302 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7303 if (D.isOne()) {
7304 // Set P, A and K to a bogus values so we can try to splat them.
7305 P = 0;
7306 A = -1;
7307 K = -1;
7308
7309 // x ?% 1 == 0 <--> true <--> x u<= -1
7310 Q = -1;
7311 }
7312
7313 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7314 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7315 KAmts.push_back(
7316 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7317 /*implicitTrunc=*/true),
7318 DL, ShSVT));
7319 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7320 return true;
7321 };
7322
7323 SDValue N = REMNode.getOperand(0);
7324 SDValue D = REMNode.getOperand(1);
7325
7326 // Collect the values from each element.
7327 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7328 return SDValue();
7329
7330 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7331 if (AllDivisorsAreOnes)
7332 return SDValue();
7333
7334 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7335 // since it can be best implemented as a bit test.
7336 if (AllDivisorsArePowerOfTwo)
7337 return SDValue();
7338
7339 SDValue PVal, AVal, KVal, QVal;
7340 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7341 if (HadOneDivisor) {
7342 // Try to turn PAmts into a splat, since we don't care about the values
7343 // that are currently '0'. If we can't, just keep '0'`s.
7345 // Try to turn AAmts into a splat, since we don't care about the
7346 // values that are currently '-1'. If we can't, change them to '0'`s.
7348 DAG.getConstant(0, DL, SVT));
7349 // Try to turn KAmts into a splat, since we don't care about the values
7350 // that are currently '-1'. If we can't, change them to '0'`s.
7352 DAG.getConstant(0, DL, ShSVT));
7353 }
7354
7355 PVal = DAG.getBuildVector(VT, DL, PAmts);
7356 AVal = DAG.getBuildVector(VT, DL, AAmts);
7357 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7358 QVal = DAG.getBuildVector(VT, DL, QAmts);
7359 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7360 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7361 QAmts.size() == 1 &&
7362 "Expected matchUnaryPredicate to return one element for scalable "
7363 "vectors");
7364 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7365 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7366 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7367 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7368 } else {
7369 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7370 PVal = PAmts[0];
7371 AVal = AAmts[0];
7372 KVal = KAmts[0];
7373 QVal = QAmts[0];
7374 }
7375
7376 // (mul N, P)
7377 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7378 Created.push_back(Op0.getNode());
7379
7380 if (NeedToApplyOffset) {
7381 // We need ADD to do this.
7382 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7383 return SDValue();
7384
7385 // (add (mul N, P), A)
7386 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7387 Created.push_back(Op0.getNode());
7388 }
7389
7390 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7391 // divisors as a performance improvement, since rotating by 0 is a no-op.
7392 if (HadEvenDivisor) {
7393 // We need ROTR to do this.
7394 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7395 return SDValue();
7396 // SREM: (rotr (add (mul N, P), A), K)
7397 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7398 Created.push_back(Op0.getNode());
7399 }
7400
7401 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7402 SDValue Fold =
7403 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7405
7406 // If we didn't have lanes with INT_MIN divisor, then we're done.
7407 if (!HadIntMinDivisor)
7408 return Fold;
7409
7410 // That fold is only valid for positive divisors. Which effectively means,
7411 // it is invalid for INT_MIN divisors. So if we have such a lane,
7412 // we must fix-up results for said lanes.
7413 assert(VT.isVector() && "Can/should only get here for vectors.");
7414
7415 // NOTE: we avoid letting illegal types through even if we're before legalize
7416 // ops – legalization has a hard time producing good code for the code that
7417 // follows.
7418 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7422 return SDValue();
7423
7424 Created.push_back(Fold.getNode());
7425
7426 SDValue IntMin = DAG.getConstant(
7428 SDValue IntMax = DAG.getConstant(
7430 SDValue Zero =
7432
7433 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7434 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7435 Created.push_back(DivisorIsIntMin.getNode());
7436
7437 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7438 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7439 Created.push_back(Masked.getNode());
7440 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7441 Created.push_back(MaskedIsZero.getNode());
7442
7443 // To produce final result we need to blend 2 vectors: 'SetCC' and
7444 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7445 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7446 // constant-folded, select can get lowered to a shuffle with constant mask.
7447 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7448 MaskedIsZero, Fold);
7449
7450 return Blended;
7451}
7452
7454 const DenormalMode &Mode) const {
7455 SDLoc DL(Op);
7456 EVT VT = Op.getValueType();
7457 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7458 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7459
7460 // This is specifically a check for the handling of denormal inputs, not the
7461 // result.
7462 if (Mode.Input == DenormalMode::PreserveSign ||
7463 Mode.Input == DenormalMode::PositiveZero) {
7464 // Test = X == 0.0
7465 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7466 }
7467
7468 // Testing it with denormal inputs to avoid wrong estimate.
7469 //
7470 // Test = fabs(X) < SmallestNormal
7471 const fltSemantics &FltSem = VT.getFltSemantics();
7472 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7473 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7474 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7475 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7476}
7477
7479 bool LegalOps, bool OptForSize,
7481 unsigned Depth) const {
7482 // fneg is removable even if it has multiple uses.
7483 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7485 return Op.getOperand(0);
7486 }
7487
7488 // Don't recurse exponentially.
7490 return SDValue();
7491
7492 // Pre-increment recursion depth for use in recursive calls.
7493 ++Depth;
7494 const SDNodeFlags Flags = Op->getFlags();
7495 const TargetOptions &Options = DAG.getTarget().Options;
7496 EVT VT = Op.getValueType();
7497 unsigned Opcode = Op.getOpcode();
7498
7499 // Don't allow anything with multiple uses unless we know it is free.
7500 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7501 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7502 isFPExtFree(VT, Op.getOperand(0).getValueType());
7503 if (!IsFreeExtend)
7504 return SDValue();
7505 }
7506
7507 auto RemoveDeadNode = [&](SDValue N) {
7508 if (N && N.getNode()->use_empty())
7509 DAG.RemoveDeadNode(N.getNode());
7510 };
7511
7512 SDLoc DL(Op);
7513
7514 // Because getNegatedExpression can delete nodes we need a handle to keep
7515 // temporary nodes alive in case the recursion manages to create an identical
7516 // node.
7517 std::list<HandleSDNode> Handles;
7518
7519 switch (Opcode) {
7520 case ISD::ConstantFP: {
7521 // Don't invert constant FP values after legalization unless the target says
7522 // the negated constant is legal.
7523 bool IsOpLegal =
7525 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7526 OptForSize);
7527
7528 if (LegalOps && !IsOpLegal)
7529 break;
7530
7531 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7532 V.changeSign();
7533 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7534
7535 // If we already have the use of the negated floating constant, it is free
7536 // to negate it even it has multiple uses.
7537 if (!Op.hasOneUse() && CFP.use_empty())
7538 break;
7540 return CFP;
7541 }
7542 case ISD::BUILD_VECTOR: {
7543 // Only permit BUILD_VECTOR of constants.
7544 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7545 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7546 }))
7547 break;
7548
7549 bool IsOpLegal =
7552 llvm::all_of(Op->op_values(), [&](SDValue N) {
7553 return N.isUndef() ||
7554 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7555 OptForSize);
7556 });
7557
7558 if (LegalOps && !IsOpLegal)
7559 break;
7560
7562 for (SDValue C : Op->op_values()) {
7563 if (C.isUndef()) {
7564 Ops.push_back(C);
7565 continue;
7566 }
7567 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7568 V.changeSign();
7569 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7570 }
7572 return DAG.getBuildVector(VT, DL, Ops);
7573 }
7574 case ISD::FADD: {
7575 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7576 break;
7577
7578 // After operation legalization, it might not be legal to create new FSUBs.
7579 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7580 break;
7581 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7582
7583 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7585 SDValue NegX =
7586 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7587 // Prevent this node from being deleted by the next call.
7588 if (NegX)
7589 Handles.emplace_back(NegX);
7590
7591 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7593 SDValue NegY =
7594 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7595
7596 // We're done with the handles.
7597 Handles.clear();
7598
7599 // Negate the X if its cost is less or equal than Y.
7600 if (NegX && (CostX <= CostY)) {
7601 Cost = CostX;
7602 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7603 if (NegY != N)
7604 RemoveDeadNode(NegY);
7605 return N;
7606 }
7607
7608 // Negate the Y if it is not expensive.
7609 if (NegY) {
7610 Cost = CostY;
7611 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7612 if (NegX != N)
7613 RemoveDeadNode(NegX);
7614 return N;
7615 }
7616 break;
7617 }
7618 case ISD::FSUB: {
7619 // We can't turn -(A-B) into B-A when we honor signed zeros.
7620 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7621 break;
7622
7623 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7624 // fold (fneg (fsub 0, Y)) -> Y
7625 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7626 if (C->isZero()) {
7628 return Y;
7629 }
7630
7631 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7633 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7634 }
7635 case ISD::FMUL:
7636 case ISD::FDIV: {
7637 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7638
7639 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7641 SDValue NegX =
7642 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7643 // Prevent this node from being deleted by the next call.
7644 if (NegX)
7645 Handles.emplace_back(NegX);
7646
7647 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7649 SDValue NegY =
7650 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7651
7652 // We're done with the handles.
7653 Handles.clear();
7654
7655 // Negate the X if its cost is less or equal than Y.
7656 if (NegX && (CostX <= CostY)) {
7657 Cost = CostX;
7658 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7659 if (NegY != N)
7660 RemoveDeadNode(NegY);
7661 return N;
7662 }
7663
7664 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7665 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7666 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7667 break;
7668
7669 // Negate the Y if it is not expensive.
7670 if (NegY) {
7671 Cost = CostY;
7672 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7673 if (NegX != N)
7674 RemoveDeadNode(NegX);
7675 return N;
7676 }
7677 break;
7678 }
7679 case ISD::FMA:
7680 case ISD::FMAD: {
7681 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7682 break;
7683
7684 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7686 SDValue NegZ =
7687 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7688 // Give up if fail to negate the Z.
7689 if (!NegZ)
7690 break;
7691
7692 // Prevent this node from being deleted by the next two calls.
7693 Handles.emplace_back(NegZ);
7694
7695 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7697 SDValue NegX =
7698 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7699 // Prevent this node from being deleted by the next call.
7700 if (NegX)
7701 Handles.emplace_back(NegX);
7702
7703 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7705 SDValue NegY =
7706 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7707
7708 // We're done with the handles.
7709 Handles.clear();
7710
7711 // Negate the X if its cost is less or equal than Y.
7712 if (NegX && (CostX <= CostY)) {
7713 Cost = std::min(CostX, CostZ);
7714 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7715 if (NegY != N)
7716 RemoveDeadNode(NegY);
7717 return N;
7718 }
7719
7720 // Negate the Y if it is not expensive.
7721 if (NegY) {
7722 Cost = std::min(CostY, CostZ);
7723 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7724 if (NegX != N)
7725 RemoveDeadNode(NegX);
7726 return N;
7727 }
7728 break;
7729 }
7730
7731 case ISD::FP_EXTEND:
7732 case ISD::FSIN:
7733 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7734 OptForSize, Cost, Depth))
7735 return DAG.getNode(Opcode, DL, VT, NegV);
7736 break;
7737 case ISD::FP_ROUND:
7738 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7739 OptForSize, Cost, Depth))
7740 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7741 break;
7742 case ISD::SELECT:
7743 case ISD::VSELECT: {
7744 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7745 // iff at least one cost is cheaper and the other is neutral/cheaper
7746 SDValue LHS = Op.getOperand(1);
7748 SDValue NegLHS =
7749 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7750 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7751 RemoveDeadNode(NegLHS);
7752 break;
7753 }
7754
7755 // Prevent this node from being deleted by the next call.
7756 Handles.emplace_back(NegLHS);
7757
7758 SDValue RHS = Op.getOperand(2);
7760 SDValue NegRHS =
7761 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7762
7763 // We're done with the handles.
7764 Handles.clear();
7765
7766 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7767 (CostLHS != NegatibleCost::Cheaper &&
7768 CostRHS != NegatibleCost::Cheaper)) {
7769 RemoveDeadNode(NegLHS);
7770 RemoveDeadNode(NegRHS);
7771 break;
7772 }
7773
7774 Cost = std::min(CostLHS, CostRHS);
7775 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7776 }
7777 }
7778
7779 return SDValue();
7780}
7781
7782//===----------------------------------------------------------------------===//
7783// Legalization Utilities
7784//===----------------------------------------------------------------------===//
7785
7786bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7787 SDValue LHS, SDValue RHS,
7789 EVT HiLoVT, SelectionDAG &DAG,
7790 MulExpansionKind Kind, SDValue LL,
7791 SDValue LH, SDValue RL, SDValue RH) const {
7792 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7793 Opcode == ISD::SMUL_LOHI);
7794
7795 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7797 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7799 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7801 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7803
7804 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7805 return false;
7806
7807 unsigned OuterBitSize = VT.getScalarSizeInBits();
7808 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7809
7810 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7811 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7812 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7813
7814 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7815 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7816 bool Signed) -> bool {
7817 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7818 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7819 Hi = SDValue(Lo.getNode(), 1);
7820 return true;
7821 }
7822 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7823 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7824 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7825 return true;
7826 }
7827 return false;
7828 };
7829
7830 SDValue Lo, Hi;
7831
7832 if (!LL.getNode() && !RL.getNode() &&
7834 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7835 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7836 }
7837
7838 if (!LL.getNode())
7839 return false;
7840
7841 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7842 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7843 DAG.MaskedValueIsZero(RHS, HighMask)) {
7844 // The inputs are both zero-extended.
7845 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7846 Result.push_back(Lo);
7847 Result.push_back(Hi);
7848 if (Opcode != ISD::MUL) {
7849 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7850 Result.push_back(Zero);
7851 Result.push_back(Zero);
7852 }
7853 return true;
7854 }
7855 }
7856
7857 if (!VT.isVector() && Opcode == ISD::MUL &&
7858 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7859 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7860 // The input values are both sign-extended.
7861 // TODO non-MUL case?
7862 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7863 Result.push_back(Lo);
7864 Result.push_back(Hi);
7865 return true;
7866 }
7867 }
7868
7869 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7870 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7871
7872 if (!LH.getNode() && !RH.getNode() &&
7875 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7876 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7877 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7878 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7879 }
7880
7881 if (!LH.getNode())
7882 return false;
7883
7884 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7885 return false;
7886
7887 Result.push_back(Lo);
7888
7889 if (Opcode == ISD::MUL) {
7890 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7891 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7892 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7893 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7894 Result.push_back(Hi);
7895 return true;
7896 }
7897
7898 // Compute the full width result.
7899 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7900 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7901 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7902 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7903 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7904 };
7905
7906 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7907 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7908 return false;
7909
7910 // This is effectively the add part of a multiply-add of half-sized operands,
7911 // so it cannot overflow.
7912 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7913
7914 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7915 return false;
7916
7917 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7918 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7919
7920 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7922 if (UseGlue)
7923 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7924 Merge(Lo, Hi));
7925 else
7926 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7927 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7928
7929 SDValue Carry = Next.getValue(1);
7930 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7931 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7932
7933 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7934 return false;
7935
7936 if (UseGlue)
7937 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7938 Carry);
7939 else
7940 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7941 Zero, Carry);
7942
7943 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7944
7945 if (Opcode == ISD::SMUL_LOHI) {
7946 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7947 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7948 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7949
7950 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7951 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7952 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7953 }
7954
7955 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7956 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7957 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7958 return true;
7959}
7960
7962 SelectionDAG &DAG, MulExpansionKind Kind,
7963 SDValue LL, SDValue LH, SDValue RL,
7964 SDValue RH) const {
7966 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7967 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7968 DAG, Kind, LL, LH, RL, RH);
7969 if (Ok) {
7970 assert(Result.size() == 2);
7971 Lo = Result[0];
7972 Hi = Result[1];
7973 }
7974 return Ok;
7975}
7976
7977// Optimize unsigned division or remainder by constants for types twice as large
7978// as a legal VT.
7979//
7980// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7981// can be computed
7982// as:
7983// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7984// Remainder = Sum % Constant
7985// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7986//
7987// For division, we can compute the remainder using the algorithm described
7988// above, subtract it from the dividend to get an exact multiple of Constant.
7989// Then multiply that exact multiply by the multiplicative inverse modulo
7990// (1 << (BitWidth / 2)) to get the quotient.
7991
7992// If Constant is even, we can shift right the dividend and the divisor by the
7993// number of trailing zeros in Constant before applying the remainder algorithm.
7994// If we're after the quotient, we can subtract this value from the shifted
7995// dividend and multiply by the multiplicative inverse of the shifted divisor.
7996// If we want the remainder, we shift the value left by the number of trailing
7997// zeros and add the bits that were shifted out of the dividend.
8000 EVT HiLoVT, SelectionDAG &DAG,
8001 SDValue LL, SDValue LH) const {
8002 unsigned Opcode = N->getOpcode();
8003 EVT VT = N->getValueType(0);
8004
8005 // TODO: Support signed division/remainder.
8006 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8007 return false;
8008 assert(
8009 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8010 "Unexpected opcode");
8011
8012 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8013 if (!CN)
8014 return false;
8015
8016 APInt Divisor = CN->getAPIntValue();
8017 unsigned BitWidth = Divisor.getBitWidth();
8018 unsigned HBitWidth = BitWidth / 2;
8020 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8021
8022 // Divisor needs to less than (1 << HBitWidth).
8023 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8024 if (Divisor.uge(HalfMaxPlus1))
8025 return false;
8026
8027 // We depend on the UREM by constant optimization in DAGCombiner that requires
8028 // high multiply.
8029 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8031 return false;
8032
8033 // Don't expand if optimizing for size.
8034 if (DAG.shouldOptForSize())
8035 return false;
8036
8037 // Early out for 0 or 1 divisors.
8038 if (Divisor.ule(1))
8039 return false;
8040
8041 // If the divisor is even, shift it until it becomes odd.
8042 unsigned TrailingZeros = 0;
8043 if (!Divisor[0]) {
8044 TrailingZeros = Divisor.countr_zero();
8045 Divisor.lshrInPlace(TrailingZeros);
8046 }
8047
8048 SDLoc dl(N);
8049 SDValue Sum;
8050 SDValue PartialRem;
8051
8052 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
8053 // then add in the carry.
8054 // TODO: If we can't split it in half, we might be able to split into 3 or
8055 // more pieces using a smaller bit width.
8056 if (HalfMaxPlus1.urem(Divisor).isOne()) {
8057 assert(!LL == !LH && "Expected both input halves or no input halves!");
8058 if (!LL)
8059 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8060
8061 // Shift the input by the number of TrailingZeros in the divisor. The
8062 // shifted out bits will be added to the remainder later.
8063 if (TrailingZeros) {
8064 // Save the shifted off bits if we need the remainder.
8065 if (Opcode != ISD::UDIV) {
8066 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8067 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8068 DAG.getConstant(Mask, dl, HiLoVT));
8069 }
8070
8071 LL = DAG.getNode(
8072 ISD::OR, dl, HiLoVT,
8073 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
8074 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
8075 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
8076 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
8077 HiLoVT, dl)));
8078 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
8079 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8080 }
8081
8082 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8083 EVT SetCCType =
8084 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8086 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8087 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8088 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8089 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8090 } else {
8091 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8092 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8093 // If the boolean for the target is 0 or 1, we can add the setcc result
8094 // directly.
8095 if (getBooleanContents(HiLoVT) ==
8097 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8098 else
8099 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8100 DAG.getConstant(0, dl, HiLoVT));
8101 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8102 }
8103 }
8104
8105 // If we didn't find a sum, we can't do the expansion.
8106 if (!Sum)
8107 return false;
8108
8109 // Perform a HiLoVT urem on the Sum using truncated divisor.
8110 SDValue RemL =
8111 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8112 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8113 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8114
8115 if (Opcode != ISD::UREM) {
8116 // Subtract the remainder from the shifted dividend.
8117 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8118 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8119
8120 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8121
8122 // Multiply by the multiplicative inverse of the divisor modulo
8123 // (1 << BitWidth).
8124 APInt MulFactor = Divisor.multiplicativeInverse();
8125
8126 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8127 DAG.getConstant(MulFactor, dl, VT));
8128
8129 // Split the quotient into low and high parts.
8130 SDValue QuotL, QuotH;
8131 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8132 Result.push_back(QuotL);
8133 Result.push_back(QuotH);
8134 }
8135
8136 if (Opcode != ISD::UDIV) {
8137 // If we shifted the input, shift the remainder left and add the bits we
8138 // shifted off the input.
8139 if (TrailingZeros) {
8140 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8141 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8142 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
8143 }
8144 Result.push_back(RemL);
8145 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
8146 }
8147
8148 return true;
8149}
8150
8151// Check that (every element of) Z is undef or not an exact multiple of BW.
8152static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8154 Z,
8155 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8156 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8157}
8158
8160 EVT VT = Node->getValueType(0);
8161 SDValue ShX, ShY;
8162 SDValue ShAmt, InvShAmt;
8163 SDValue X = Node->getOperand(0);
8164 SDValue Y = Node->getOperand(1);
8165 SDValue Z = Node->getOperand(2);
8166 SDValue Mask = Node->getOperand(3);
8167 SDValue VL = Node->getOperand(4);
8168
8169 unsigned BW = VT.getScalarSizeInBits();
8170 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8171 SDLoc DL(SDValue(Node, 0));
8172
8173 EVT ShVT = Z.getValueType();
8174 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8175 // fshl: X << C | Y >> (BW - C)
8176 // fshr: X << (BW - C) | Y >> C
8177 // where C = Z % BW is not zero
8178 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8179 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8180 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8181 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8182 VL);
8183 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8184 VL);
8185 } else {
8186 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8187 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8188 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8189 if (isPowerOf2_32(BW)) {
8190 // Z % BW -> Z & (BW - 1)
8191 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8192 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8193 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8194 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8195 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8196 } else {
8197 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8198 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8199 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8200 }
8201
8202 SDValue One = DAG.getConstant(1, DL, ShVT);
8203 if (IsFSHL) {
8204 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8205 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8206 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8207 } else {
8208 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8209 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8210 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8211 }
8212 }
8213 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8214}
8215
8217 SelectionDAG &DAG) const {
8218 if (Node->isVPOpcode())
8219 return expandVPFunnelShift(Node, DAG);
8220
8221 EVT VT = Node->getValueType(0);
8222
8223 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8227 return SDValue();
8228
8229 SDValue X = Node->getOperand(0);
8230 SDValue Y = Node->getOperand(1);
8231 SDValue Z = Node->getOperand(2);
8232
8233 unsigned BW = VT.getScalarSizeInBits();
8234 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8235 SDLoc DL(SDValue(Node, 0));
8236
8237 EVT ShVT = Z.getValueType();
8238
8239 // If a funnel shift in the other direction is more supported, use it.
8240 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8241 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8242 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8243 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8244 // fshl X, Y, Z -> fshr X, Y, -Z
8245 // fshr X, Y, Z -> fshl X, Y, -Z
8246 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8247 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8248 } else {
8249 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8250 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8251 SDValue One = DAG.getConstant(1, DL, ShVT);
8252 if (IsFSHL) {
8253 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8254 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8255 } else {
8256 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8257 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8258 }
8259 Z = DAG.getNOT(DL, Z, ShVT);
8260 }
8261 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8262 }
8263
8264 SDValue ShX, ShY;
8265 SDValue ShAmt, InvShAmt;
8266 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8267 // fshl: X << C | Y >> (BW - C)
8268 // fshr: X << (BW - C) | Y >> C
8269 // where C = Z % BW is not zero
8270 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8271 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8272 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8273 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8274 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8275 } else {
8276 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8277 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8278 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8279 if (isPowerOf2_32(BW)) {
8280 // Z % BW -> Z & (BW - 1)
8281 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8282 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8283 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8284 } else {
8285 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8286 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8287 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8288 }
8289
8290 SDValue One = DAG.getConstant(1, DL, ShVT);
8291 if (IsFSHL) {
8292 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8293 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8294 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8295 } else {
8296 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8297 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8298 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8299 }
8300 }
8301 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8302}
8303
8304// TODO: Merge with expandFunnelShift.
8306 SelectionDAG &DAG) const {
8307 EVT VT = Node->getValueType(0);
8308 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8309 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8310 SDValue Op0 = Node->getOperand(0);
8311 SDValue Op1 = Node->getOperand(1);
8312 SDLoc DL(SDValue(Node, 0));
8313
8314 EVT ShVT = Op1.getValueType();
8315 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8316
8317 // If a rotate in the other direction is more supported, use it.
8318 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8319 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8320 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8321 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8322 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8323 }
8324
8325 if (!AllowVectorOps && VT.isVector() &&
8331 return SDValue();
8332
8333 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8334 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8335 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8336 SDValue ShVal;
8337 SDValue HsVal;
8338 if (isPowerOf2_32(EltSizeInBits)) {
8339 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8340 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8341 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8342 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8343 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8344 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8345 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8346 } else {
8347 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8348 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8349 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8350 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8351 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8352 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8353 SDValue One = DAG.getConstant(1, DL, ShVT);
8354 HsVal =
8355 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8356 }
8357 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8358}
8359
8361 SelectionDAG &DAG) const {
8362 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8363 EVT VT = Node->getValueType(0);
8364 unsigned VTBits = VT.getScalarSizeInBits();
8365 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8366
8367 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8368 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8369 SDValue ShOpLo = Node->getOperand(0);
8370 SDValue ShOpHi = Node->getOperand(1);
8371 SDValue ShAmt = Node->getOperand(2);
8372 EVT ShAmtVT = ShAmt.getValueType();
8373 EVT ShAmtCCVT =
8374 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8375 SDLoc dl(Node);
8376
8377 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8378 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8379 // away during isel.
8380 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8381 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8382 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8383 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8384 : DAG.getConstant(0, dl, VT);
8385
8386 SDValue Tmp2, Tmp3;
8387 if (IsSHL) {
8388 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8389 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8390 } else {
8391 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8392 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8393 }
8394
8395 // If the shift amount is larger or equal than the width of a part we don't
8396 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8397 // values for large shift amounts.
8398 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8399 DAG.getConstant(VTBits, dl, ShAmtVT));
8400 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8401 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8402
8403 if (IsSHL) {
8404 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8405 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8406 } else {
8407 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8408 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8409 }
8410}
8411
8413 SelectionDAG &DAG) const {
8414 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8415 SDValue Src = Node->getOperand(OpNo);
8416 EVT SrcVT = Src.getValueType();
8417 EVT DstVT = Node->getValueType(0);
8418 SDLoc dl(SDValue(Node, 0));
8419
8420 // FIXME: Only f32 to i64 conversions are supported.
8421 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8422 return false;
8423
8424 if (Node->isStrictFPOpcode())
8425 // When a NaN is converted to an integer a trap is allowed. We can't
8426 // use this expansion here because it would eliminate that trap. Other
8427 // traps are also allowed and cannot be eliminated. See
8428 // IEEE 754-2008 sec 5.8.
8429 return false;
8430
8431 // Expand f32 -> i64 conversion
8432 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8433 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8434 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8435 EVT IntVT = SrcVT.changeTypeToInteger();
8436 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8437
8438 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8439 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8440 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8441 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8442 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8443 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8444
8445 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8446
8447 SDValue ExponentBits = DAG.getNode(
8448 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8449 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8450 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8451
8452 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8453 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8454 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8455 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8456
8457 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8458 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8459 DAG.getConstant(0x00800000, dl, IntVT));
8460
8461 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8462
8463 R = DAG.getSelectCC(
8464 dl, Exponent, ExponentLoBit,
8465 DAG.getNode(ISD::SHL, dl, DstVT, R,
8466 DAG.getZExtOrTrunc(
8467 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8468 dl, IntShVT)),
8469 DAG.getNode(ISD::SRL, dl, DstVT, R,
8470 DAG.getZExtOrTrunc(
8471 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8472 dl, IntShVT)),
8473 ISD::SETGT);
8474
8475 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8476 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8477
8478 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8479 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8480 return true;
8481}
8482
8484 SDValue &Chain,
8485 SelectionDAG &DAG) const {
8486 SDLoc dl(SDValue(Node, 0));
8487 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8488 SDValue Src = Node->getOperand(OpNo);
8489
8490 EVT SrcVT = Src.getValueType();
8491 EVT DstVT = Node->getValueType(0);
8492 EVT SetCCVT =
8493 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8494 EVT DstSetCCVT =
8495 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8496
8497 // Only expand vector types if we have the appropriate vector bit operations.
8498 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8500 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8502 return false;
8503
8504 // If the maximum float value is smaller then the signed integer range,
8505 // the destination signmask can't be represented by the float, so we can
8506 // just use FP_TO_SINT directly.
8507 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8508 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8509 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8511 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8512 if (Node->isStrictFPOpcode()) {
8513 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8514 { Node->getOperand(0), Src });
8515 Chain = Result.getValue(1);
8516 } else
8517 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8518 return true;
8519 }
8520
8521 // Don't expand it if there isn't cheap fsub instruction.
8523 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8524 return false;
8525
8526 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8527 SDValue Sel;
8528
8529 if (Node->isStrictFPOpcode()) {
8530 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8531 Node->getOperand(0), /*IsSignaling*/ true);
8532 Chain = Sel.getValue(1);
8533 } else {
8534 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8535 }
8536
8537 bool Strict = Node->isStrictFPOpcode() ||
8538 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8539
8540 if (Strict) {
8541 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8542 // signmask then offset (the result of which should be fully representable).
8543 // Sel = Src < 0x8000000000000000
8544 // FltOfs = select Sel, 0, 0x8000000000000000
8545 // IntOfs = select Sel, 0, 0x8000000000000000
8546 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8547
8548 // TODO: Should any fast-math-flags be set for the FSUB?
8549 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8550 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8551 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8552 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8553 DAG.getConstant(0, dl, DstVT),
8554 DAG.getConstant(SignMask, dl, DstVT));
8555 SDValue SInt;
8556 if (Node->isStrictFPOpcode()) {
8557 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8558 { Chain, Src, FltOfs });
8559 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8560 { Val.getValue(1), Val });
8561 Chain = SInt.getValue(1);
8562 } else {
8563 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8564 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8565 }
8566 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8567 } else {
8568 // Expand based on maximum range of FP_TO_SINT:
8569 // True = fp_to_sint(Src)
8570 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8571 // Result = select (Src < 0x8000000000000000), True, False
8572
8573 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8574 // TODO: Should any fast-math-flags be set for the FSUB?
8575 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8576 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8577 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8578 DAG.getConstant(SignMask, dl, DstVT));
8579 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8580 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8581 }
8582 return true;
8583}
8584
8586 SDValue &Chain, SelectionDAG &DAG) const {
8587 // This transform is not correct for converting 0 when rounding mode is set
8588 // to round toward negative infinity which will produce -0.0. So disable
8589 // under strictfp.
8590 if (Node->isStrictFPOpcode())
8591 return false;
8592
8593 SDValue Src = Node->getOperand(0);
8594 EVT SrcVT = Src.getValueType();
8595 EVT DstVT = Node->getValueType(0);
8596
8597 // If the input is known to be non-negative and SINT_TO_FP is legal then use
8598 // it.
8599 if (Node->getFlags().hasNonNeg() &&
8601 Result =
8602 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8603 return true;
8604 }
8605
8606 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8607 return false;
8608
8609 // Only expand vector types if we have the appropriate vector bit
8610 // operations.
8611 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8616 return false;
8617
8618 SDLoc dl(SDValue(Node, 0));
8619
8620 // Implementation of unsigned i64 to f64 following the algorithm in
8621 // __floatundidf in compiler_rt. This implementation performs rounding
8622 // correctly in all rounding modes with the exception of converting 0
8623 // when rounding toward negative infinity. In that case the fsub will
8624 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8625 // incorrect.
8626 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8627 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8628 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8629 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8630 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8631 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
8632
8633 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8634 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8635 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8636 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8637 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8638 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8639 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8640 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8641 return true;
8642}
8643
8644SDValue
8646 SelectionDAG &DAG) const {
8647 unsigned Opcode = Node->getOpcode();
8648 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8649 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8650 "Wrong opcode");
8651
8652 if (Node->getFlags().hasNoNaNs()) {
8653 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8654 EVT VT = Node->getValueType(0);
8655 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8657 VT.isVector())
8658 return SDValue();
8659 SDValue Op1 = Node->getOperand(0);
8660 SDValue Op2 = Node->getOperand(1);
8661 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
8662 Node->getFlags());
8663 }
8664
8665 return SDValue();
8666}
8667
8669 SelectionDAG &DAG) const {
8670 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8671 return Expanded;
8672
8673 EVT VT = Node->getValueType(0);
8674 if (VT.isScalableVector())
8676 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8677
8678 SDLoc dl(Node);
8679 unsigned NewOp =
8680 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8681
8682 if (isOperationLegalOrCustom(NewOp, VT)) {
8683 SDValue Quiet0 = Node->getOperand(0);
8684 SDValue Quiet1 = Node->getOperand(1);
8685
8686 if (!Node->getFlags().hasNoNaNs()) {
8687 // Insert canonicalizes if it's possible we need to quiet to get correct
8688 // sNaN behavior.
8689 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8690 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8691 Node->getFlags());
8692 }
8693 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8694 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8695 Node->getFlags());
8696 }
8697 }
8698
8699 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8700 }
8701
8702 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8703 // instead if there are no NaNs and there can't be an incompatible zero
8704 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8705 if ((Node->getFlags().hasNoNaNs() ||
8706 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8707 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8708 (Node->getFlags().hasNoSignedZeros() ||
8709 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8710 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8711 unsigned IEEE2018Op =
8712 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8713 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8714 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8715 Node->getOperand(1), Node->getFlags());
8716 }
8717
8719 return SelCC;
8720
8721 return SDValue();
8722}
8723
8725 SelectionDAG &DAG) const {
8726 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8727 return Expanded;
8728
8729 SDLoc DL(N);
8730 SDValue LHS = N->getOperand(0);
8731 SDValue RHS = N->getOperand(1);
8732 unsigned Opc = N->getOpcode();
8733 EVT VT = N->getValueType(0);
8734 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8735 bool IsMax = Opc == ISD::FMAXIMUM;
8736 SDNodeFlags Flags = N->getFlags();
8737
8738 // First, implement comparison not propagating NaN. If no native fmin or fmax
8739 // available, use plain select with setcc instead.
8741 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8742 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8743
8744 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8745 // signed zero behavior.
8746 bool MinMaxMustRespectOrderedZero = false;
8747
8748 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8749 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8750 MinMaxMustRespectOrderedZero = true;
8751 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8752 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8753 } else {
8755 return DAG.UnrollVectorOp(N);
8756
8757 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8758 SDValue Compare =
8759 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8760 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8761 }
8762
8763 // Propagate any NaN of both operands
8764 if (!N->getFlags().hasNoNaNs() &&
8765 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8766 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8768 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8769 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8770 }
8771
8772 // fminimum/fmaximum requires -0.0 less than +0.0
8773 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8774 !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8775 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8776 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8777 SDValue TestZero =
8778 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8779 SDValue LCmp = DAG.getSelect(
8780 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8781 MinMax, Flags);
8782 SDValue RCmp = DAG.getSelect(
8783 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8784 LCmp, Flags);
8785 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8786 }
8787
8788 return MinMax;
8789}
8790
8792 SelectionDAG &DAG) const {
8793 SDLoc DL(Node);
8794 SDValue LHS = Node->getOperand(0);
8795 SDValue RHS = Node->getOperand(1);
8796 unsigned Opc = Node->getOpcode();
8797 EVT VT = Node->getValueType(0);
8798 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8799 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8800 const TargetOptions &Options = DAG.getTarget().Options;
8801 SDNodeFlags Flags = Node->getFlags();
8802
8803 unsigned NewOp =
8804 Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8805
8806 if (isOperationLegalOrCustom(NewOp, VT)) {
8807 if (!Flags.hasNoNaNs()) {
8808 // Insert canonicalizes if it's possible we need to quiet to get correct
8809 // sNaN behavior.
8810 if (!DAG.isKnownNeverSNaN(LHS)) {
8811 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8812 }
8813 if (!DAG.isKnownNeverSNaN(RHS)) {
8814 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8815 }
8816 }
8817
8818 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8819 }
8820
8821 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8822 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8823 if (Flags.hasNoNaNs() ||
8824 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8825 unsigned IEEE2019Op =
8826 Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8827 if (isOperationLegalOrCustom(IEEE2019Op, VT))
8828 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8829 }
8830
8831 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8832 // either one for +0.0 vs -0.0.
8833 if ((Flags.hasNoNaNs() ||
8834 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8835 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8836 DAG.isKnownNeverZeroFloat(RHS))) {
8837 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8838 if (isOperationLegalOrCustom(IEEE2008Op, VT))
8839 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8840 }
8841
8843 return DAG.UnrollVectorOp(Node);
8844
8845 // If only one operand is NaN, override it with another operand.
8846 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8847 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8848 }
8849 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8850 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8851 }
8852
8853 SDValue MinMax =
8854 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8855
8856 // TODO: We need quiet sNaN if strictfp.
8857
8858 // Fixup signed zero behavior.
8859 if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8860 DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
8861 return MinMax;
8862 }
8863 SDValue TestZero =
8864 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8865 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8866 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8867 SDValue LCmp = DAG.getSelect(
8868 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8869 MinMax, Flags);
8870 SDValue RCmp = DAG.getSelect(
8871 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8872 Flags);
8873 return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8874}
8875
8876/// Returns a true value if if this FPClassTest can be performed with an ordered
8877/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8878/// std::nullopt if it cannot be performed as a compare with 0.
8879static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8880 const fltSemantics &Semantics,
8881 const MachineFunction &MF) {
8882 FPClassTest OrderedMask = Test & ~fcNan;
8883 FPClassTest NanTest = Test & fcNan;
8884 bool IsOrdered = NanTest == fcNone;
8885 bool IsUnordered = NanTest == fcNan;
8886
8887 // Skip cases that are testing for only a qnan or snan.
8888 if (!IsOrdered && !IsUnordered)
8889 return std::nullopt;
8890
8891 if (OrderedMask == fcZero &&
8892 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8893 return IsOrdered;
8894 if (OrderedMask == (fcZero | fcSubnormal) &&
8895 MF.getDenormalMode(Semantics).inputsAreZero())
8896 return IsOrdered;
8897 return std::nullopt;
8898}
8899
8901 const FPClassTest OrigTestMask,
8902 SDNodeFlags Flags, const SDLoc &DL,
8903 SelectionDAG &DAG) const {
8904 EVT OperandVT = Op.getValueType();
8905 assert(OperandVT.isFloatingPoint());
8906 FPClassTest Test = OrigTestMask;
8907
8908 // Degenerated cases.
8909 if (Test == fcNone)
8910 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8911 if (Test == fcAllFlags)
8912 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8913
8914 // PPC double double is a pair of doubles, of which the higher part determines
8915 // the value class.
8916 if (OperandVT == MVT::ppcf128) {
8917 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8918 DAG.getConstant(1, DL, MVT::i32));
8919 OperandVT = MVT::f64;
8920 }
8921
8922 // Floating-point type properties.
8923 EVT ScalarFloatVT = OperandVT.getScalarType();
8924 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8925 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8926 bool IsF80 = (ScalarFloatVT == MVT::f80);
8927
8928 // Some checks can be implemented using float comparisons, if floating point
8929 // exceptions are ignored.
8930 if (Flags.hasNoFPExcept() &&
8932 FPClassTest FPTestMask = Test;
8933 bool IsInvertedFP = false;
8934
8935 if (FPClassTest InvertedFPCheck =
8936 invertFPClassTestIfSimpler(FPTestMask, true)) {
8937 FPTestMask = InvertedFPCheck;
8938 IsInvertedFP = true;
8939 }
8940
8941 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8942 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8943
8944 // See if we can fold an | fcNan into an unordered compare.
8945 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8946
8947 // Can't fold the ordered check if we're only testing for snan or qnan
8948 // individually.
8949 if ((FPTestMask & fcNan) != fcNan)
8950 OrderedFPTestMask = FPTestMask;
8951
8952 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8953
8954 if (std::optional<bool> IsCmp0 =
8955 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8956 IsCmp0 && (isCondCodeLegalOrCustom(
8957 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8958 OperandVT.getScalarType().getSimpleVT()))) {
8959
8960 // If denormals could be implicitly treated as 0, this is not equivalent
8961 // to a compare with 0 since it will also be true for denormals.
8962 return DAG.getSetCC(DL, ResultVT, Op,
8963 DAG.getConstantFP(0.0, DL, OperandVT),
8964 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8965 }
8966
8967 if (FPTestMask == fcNan &&
8969 OperandVT.getScalarType().getSimpleVT()))
8970 return DAG.getSetCC(DL, ResultVT, Op, Op,
8971 IsInvertedFP ? ISD::SETO : ISD::SETUO);
8972
8973 bool IsOrderedInf = FPTestMask == fcInf;
8974 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8975 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8976 : UnorderedCmpOpcode,
8977 OperandVT.getScalarType().getSimpleVT()) &&
8978 isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&
8980 (OperandVT.isVector() &&
8982 // isinf(x) --> fabs(x) == inf
8983 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8984 SDValue Inf =
8985 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8986 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8987 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8988 }
8989
8990 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
8991 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
8992 : UnorderedCmpOpcode,
8993 OperandVT.getSimpleVT())) {
8994 // isposinf(x) --> x == inf
8995 // isneginf(x) --> x == -inf
8996 // isposinf(x) || nan --> x u== inf
8997 // isneginf(x) || nan --> x u== -inf
8998
8999 SDValue Inf = DAG.getConstantFP(
9000 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
9001 OperandVT);
9002 return DAG.getSetCC(DL, ResultVT, Op, Inf,
9003 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9004 }
9005
9006 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9007 // TODO: Could handle ordered case, but it produces worse code for
9008 // x86. Maybe handle ordered if fabs is free?
9009
9010 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9011 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9012
9013 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9014 OperandVT.getScalarType().getSimpleVT())) {
9015 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9016
9017 // TODO: Maybe only makes sense if fabs is free. Integer test of
9018 // exponent bits seems better for x86.
9019 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9020 SDValue SmallestNormal = DAG.getConstantFP(
9021 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9022 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9023 IsOrdered ? OrderedOp : UnorderedOp);
9024 }
9025 }
9026
9027 if (FPTestMask == fcNormal) {
9028 // TODO: Handle unordered
9029 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9030 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9031
9032 if (isCondCodeLegalOrCustom(IsFiniteOp,
9033 OperandVT.getScalarType().getSimpleVT()) &&
9034 isCondCodeLegalOrCustom(IsNormalOp,
9035 OperandVT.getScalarType().getSimpleVT()) &&
9036 isFAbsFree(OperandVT)) {
9037 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9038 SDValue Inf =
9039 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9040 SDValue SmallestNormal = DAG.getConstantFP(
9041 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9042
9043 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9044 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9045 SDValue IsNormal =
9046 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9047 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9048 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9049 }
9050 }
9051 }
9052
9053 // Some checks may be represented as inversion of simpler check, for example
9054 // "inf|normal|subnormal|zero" => !"nan".
9055 bool IsInverted = false;
9056
9057 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9058 Test = InvertedCheck;
9059 IsInverted = true;
9060 }
9061
9062 // In the general case use integer operations.
9063 unsigned BitSize = OperandVT.getScalarSizeInBits();
9064 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
9065 if (OperandVT.isVector())
9066 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
9067 OperandVT.getVectorElementCount());
9068 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9069
9070 // Various masks.
9071 APInt SignBit = APInt::getSignMask(BitSize);
9072 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9073 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9074 const unsigned ExplicitIntBitInF80 = 63;
9075 APInt ExpMask = Inf;
9076 if (IsF80)
9077 ExpMask.clearBit(ExplicitIntBitInF80);
9078 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9079 APInt QNaNBitMask =
9080 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9081 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9082
9083 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9084 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9085 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9086 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9087 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9088 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9089
9090 SDValue Res;
9091 const auto appendResult = [&](SDValue PartialRes) {
9092 if (PartialRes) {
9093 if (Res)
9094 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9095 else
9096 Res = PartialRes;
9097 }
9098 };
9099
9100 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9101 const auto getIntBitIsSet = [&]() -> SDValue {
9102 if (!IntBitIsSetV) {
9103 APInt IntBitMask(BitSize, 0);
9104 IntBitMask.setBit(ExplicitIntBitInF80);
9105 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9106 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9107 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9108 }
9109 return IntBitIsSetV;
9110 };
9111
9112 // Split the value into sign bit and absolute value.
9113 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9114 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9115 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9116
9117 // Tests that involve more than one class should be processed first.
9118 SDValue PartialRes;
9119
9120 if (IsF80)
9121 ; // Detect finite numbers of f80 by checking individual classes because
9122 // they have different settings of the explicit integer bit.
9123 else if ((Test & fcFinite) == fcFinite) {
9124 // finite(V) ==> abs(V) < exp_mask
9125 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9126 Test &= ~fcFinite;
9127 } else if ((Test & fcFinite) == fcPosFinite) {
9128 // finite(V) && V > 0 ==> V < exp_mask
9129 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9130 Test &= ~fcPosFinite;
9131 } else if ((Test & fcFinite) == fcNegFinite) {
9132 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9133 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9134 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9135 Test &= ~fcNegFinite;
9136 }
9137 appendResult(PartialRes);
9138
9139 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9140 // fcZero | fcSubnormal => test all exponent bits are 0
9141 // TODO: Handle sign bit specific cases
9142 if (PartialCheck == (fcZero | fcSubnormal)) {
9143 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9144 SDValue ExpIsZero =
9145 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9146 appendResult(ExpIsZero);
9147 Test &= ~PartialCheck & fcAllFlags;
9148 }
9149 }
9150
9151 // Check for individual classes.
9152
9153 if (unsigned PartialCheck = Test & fcZero) {
9154 if (PartialCheck == fcPosZero)
9155 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
9156 else if (PartialCheck == fcZero)
9157 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
9158 else // ISD::fcNegZero
9159 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
9160 appendResult(PartialRes);
9161 }
9162
9163 if (unsigned PartialCheck = Test & fcSubnormal) {
9164 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9165 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9166 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9167 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
9168 SDValue VMinusOneV =
9169 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
9170 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
9171 if (PartialCheck == fcNegSubnormal)
9172 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9173 appendResult(PartialRes);
9174 }
9175
9176 if (unsigned PartialCheck = Test & fcInf) {
9177 if (PartialCheck == fcPosInf)
9178 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9179 else if (PartialCheck == fcInf)
9180 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9181 else { // ISD::fcNegInf
9182 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9183 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9184 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9185 }
9186 appendResult(PartialRes);
9187 }
9188
9189 if (unsigned PartialCheck = Test & fcNan) {
9190 APInt InfWithQnanBit = Inf | QNaNBitMask;
9191 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9192 if (PartialCheck == fcNan) {
9193 // isnan(V) ==> abs(V) > int(inf)
9194 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9195 if (IsF80) {
9196 // Recognize unsupported values as NaNs for compatibility with glibc.
9197 // In them (exp(V)==0) == int_bit.
9198 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9199 SDValue ExpIsZero =
9200 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9201 SDValue IsPseudo =
9202 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9203 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9204 }
9205 } else if (PartialCheck == fcQNan) {
9206 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9207 PartialRes =
9208 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9209 } else { // ISD::fcSNan
9210 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9211 // abs(V) < (unsigned(Inf) | quiet_bit)
9212 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9213 SDValue IsNotQnan =
9214 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9215 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9216 }
9217 appendResult(PartialRes);
9218 }
9219
9220 if (unsigned PartialCheck = Test & fcNormal) {
9221 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9222 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9223 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9224 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9225 APInt ExpLimit = ExpMask - ExpLSB;
9226 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9227 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9228 if (PartialCheck == fcNegNormal)
9229 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9230 else if (PartialCheck == fcPosNormal) {
9231 SDValue PosSignV =
9232 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
9233 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9234 }
9235 if (IsF80)
9236 PartialRes =
9237 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9238 appendResult(PartialRes);
9239 }
9240
9241 if (!Res)
9242 return DAG.getConstant(IsInverted, DL, ResultVT);
9243 if (IsInverted)
9244 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
9245 return Res;
9246}
9247
9248// Only expand vector types if we have the appropriate vector bit operations.
9249static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9250 assert(VT.isVector() && "Expected vector type");
9251 unsigned Len = VT.getScalarSizeInBits();
9252 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9255 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9257}
9258
9260 SDLoc dl(Node);
9261 EVT VT = Node->getValueType(0);
9262 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9263 SDValue Op = Node->getOperand(0);
9264 unsigned Len = VT.getScalarSizeInBits();
9265 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9266
9267 // TODO: Add support for irregular type lengths.
9268 if (!(Len <= 128 && Len % 8 == 0))
9269 return SDValue();
9270
9271 // Only expand vector types if we have the appropriate vector bit operations.
9272 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9273 return SDValue();
9274
9275 // This is the "best" algorithm from
9276 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9277 SDValue Mask55 =
9278 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9279 SDValue Mask33 =
9280 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9281 SDValue Mask0F =
9282 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9283
9284 // v = v - ((v >> 1) & 0x55555555...)
9285 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9286 DAG.getNode(ISD::AND, dl, VT,
9287 DAG.getNode(ISD::SRL, dl, VT, Op,
9288 DAG.getConstant(1, dl, ShVT)),
9289 Mask55));
9290 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9291 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9292 DAG.getNode(ISD::AND, dl, VT,
9293 DAG.getNode(ISD::SRL, dl, VT, Op,
9294 DAG.getConstant(2, dl, ShVT)),
9295 Mask33));
9296 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9297 Op = DAG.getNode(ISD::AND, dl, VT,
9298 DAG.getNode(ISD::ADD, dl, VT, Op,
9299 DAG.getNode(ISD::SRL, dl, VT, Op,
9300 DAG.getConstant(4, dl, ShVT))),
9301 Mask0F);
9302
9303 if (Len <= 8)
9304 return Op;
9305
9306 // Avoid the multiply if we only have 2 bytes to add.
9307 // TODO: Only doing this for scalars because vectors weren't as obviously
9308 // improved.
9309 if (Len == 16 && !VT.isVector()) {
9310 // v = (v + (v >> 8)) & 0x00FF;
9311 return DAG.getNode(ISD::AND, dl, VT,
9312 DAG.getNode(ISD::ADD, dl, VT, Op,
9313 DAG.getNode(ISD::SRL, dl, VT, Op,
9314 DAG.getConstant(8, dl, ShVT))),
9315 DAG.getConstant(0xFF, dl, VT));
9316 }
9317
9318 // v = (v * 0x01010101...) >> (Len - 8)
9319 SDValue V;
9322 SDValue Mask01 =
9323 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9324 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9325 } else {
9326 V = Op;
9327 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9328 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9329 V = DAG.getNode(ISD::ADD, dl, VT, V,
9330 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9331 }
9332 }
9333 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9334}
9335
9337 SDLoc dl(Node);
9338 EVT VT = Node->getValueType(0);
9339 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9340 SDValue Op = Node->getOperand(0);
9341 SDValue Mask = Node->getOperand(1);
9342 SDValue VL = Node->getOperand(2);
9343 unsigned Len = VT.getScalarSizeInBits();
9344 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9345
9346 // TODO: Add support for irregular type lengths.
9347 if (!(Len <= 128 && Len % 8 == 0))
9348 return SDValue();
9349
9350 // This is same algorithm of expandCTPOP from
9351 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9352 SDValue Mask55 =
9353 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9354 SDValue Mask33 =
9355 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9356 SDValue Mask0F =
9357 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9358
9359 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9360
9361 // v = v - ((v >> 1) & 0x55555555...)
9362 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9363 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9364 DAG.getConstant(1, dl, ShVT), Mask, VL),
9365 Mask55, Mask, VL);
9366 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9367
9368 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9369 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9370 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9371 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9372 DAG.getConstant(2, dl, ShVT), Mask, VL),
9373 Mask33, Mask, VL);
9374 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9375
9376 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9377 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9378 Mask, VL),
9379 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9380 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9381
9382 if (Len <= 8)
9383 return Op;
9384
9385 // v = (v * 0x01010101...) >> (Len - 8)
9386 SDValue V;
9388 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9389 SDValue Mask01 =
9390 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9391 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9392 } else {
9393 V = Op;
9394 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9395 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9396 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9397 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9398 Mask, VL);
9399 }
9400 }
9401 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9402 Mask, VL);
9403}
9404
9406 SDLoc dl(Node);
9407 EVT VT = Node->getValueType(0);
9408 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9409 SDValue Op = Node->getOperand(0);
9410 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9411
9412 // If the non-ZERO_UNDEF version is supported we can use that instead.
9413 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9415 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9416
9417 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9419 EVT SetCCVT =
9420 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9421 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9422 SDValue Zero = DAG.getConstant(0, dl, VT);
9423 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9424 return DAG.getSelect(dl, VT, SrcIsZero,
9425 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9426 }
9427
9428 // Only expand vector types if we have the appropriate vector bit operations.
9429 // This includes the operations needed to expand CTPOP if it isn't supported.
9430 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9432 !canExpandVectorCTPOP(*this, VT)) ||
9435 return SDValue();
9436
9437 // for now, we do this:
9438 // x = x | (x >> 1);
9439 // x = x | (x >> 2);
9440 // ...
9441 // x = x | (x >>16);
9442 // x = x | (x >>32); // for 64-bit input
9443 // return popcount(~x);
9444 //
9445 // Ref: "Hacker's Delight" by Henry Warren
9446 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9447 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9448 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9449 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9450 }
9451 Op = DAG.getNOT(dl, Op, VT);
9452 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9453}
9454
9456 SDLoc dl(Node);
9457 EVT VT = Node->getValueType(0);
9458 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9459 SDValue Op = Node->getOperand(0);
9460 SDValue Mask = Node->getOperand(1);
9461 SDValue VL = Node->getOperand(2);
9462 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9463
9464 // do this:
9465 // x = x | (x >> 1);
9466 // x = x | (x >> 2);
9467 // ...
9468 // x = x | (x >>16);
9469 // x = x | (x >>32); // for 64-bit input
9470 // return popcount(~x);
9471 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9472 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9473 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9474 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9475 VL);
9476 }
9477 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9478 Mask, VL);
9479 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9480}
9481
9483 const SDLoc &DL, EVT VT, SDValue Op,
9484 unsigned BitWidth) const {
9485 if (BitWidth != 32 && BitWidth != 64)
9486 return SDValue();
9487 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9488 : APInt(64, 0x0218A392CD3D5DBFULL);
9489 const DataLayout &TD = DAG.getDataLayout();
9490 MachinePointerInfo PtrInfo =
9492 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9493 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9494 SDValue Lookup = DAG.getNode(
9495 ISD::SRL, DL, VT,
9496 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9497 DAG.getConstant(DeBruijn, DL, VT)),
9498 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
9500
9502 for (unsigned i = 0; i < BitWidth; i++) {
9503 APInt Shl = DeBruijn.shl(i);
9504 APInt Lshr = Shl.lshr(ShiftAmt);
9505 Table[Lshr.getZExtValue()] = i;
9506 }
9507
9508 // Create a ConstantArray in Constant Pool
9509 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9510 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9511 TD.getPrefTypeAlign(CA->getType()));
9512 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9513 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9514 PtrInfo, MVT::i8);
9515 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9516 return ExtLoad;
9517
9518 EVT SetCCVT =
9519 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9520 SDValue Zero = DAG.getConstant(0, DL, VT);
9521 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9522 return DAG.getSelect(DL, VT, SrcIsZero,
9523 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9524}
9525
9527 SDLoc dl(Node);
9528 EVT VT = Node->getValueType(0);
9529 SDValue Op = Node->getOperand(0);
9530 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9531
9532 // If the non-ZERO_UNDEF version is supported we can use that instead.
9533 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9535 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9536
9537 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9539 EVT SetCCVT =
9540 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9541 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9542 SDValue Zero = DAG.getConstant(0, dl, VT);
9543 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9544 return DAG.getSelect(dl, VT, SrcIsZero,
9545 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9546 }
9547
9548 // Only expand vector types if we have the appropriate vector bit operations.
9549 // This includes the operations needed to expand CTPOP if it isn't supported.
9550 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9553 !canExpandVectorCTPOP(*this, VT)) ||
9557 return SDValue();
9558
9559 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9560 // to be expanded or converted to a libcall.
9563 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9564 return V;
9565
9566 // for now, we use: { return popcount(~x & (x - 1)); }
9567 // unless the target has ctlz but not ctpop, in which case we use:
9568 // { return 32 - nlz(~x & (x-1)); }
9569 // Ref: "Hacker's Delight" by Henry Warren
9570 SDValue Tmp = DAG.getNode(
9571 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9572 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9573
9574 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9576 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9577 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9578 }
9579
9580 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9581}
9582
9584 SDValue Op = Node->getOperand(0);
9585 SDValue Mask = Node->getOperand(1);
9586 SDValue VL = Node->getOperand(2);
9587 SDLoc dl(Node);
9588 EVT VT = Node->getValueType(0);
9589
9590 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9591 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9592 DAG.getAllOnesConstant(dl, VT), Mask, VL);
9593 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9594 DAG.getConstant(1, dl, VT), Mask, VL);
9595 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9596 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9597}
9598
9600 SelectionDAG &DAG) const {
9601 // %cond = to_bool_vec %source
9602 // %splat = splat /*val=*/VL
9603 // %tz = step_vector
9604 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9605 // %r = vp.reduce.umin %v
9606 SDLoc DL(N);
9607 SDValue Source = N->getOperand(0);
9608 SDValue Mask = N->getOperand(1);
9609 SDValue EVL = N->getOperand(2);
9610 EVT SrcVT = Source.getValueType();
9611 EVT ResVT = N->getValueType(0);
9612 EVT ResVecVT =
9613 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9614
9615 // Convert to boolean vector.
9616 if (SrcVT.getScalarType() != MVT::i1) {
9617 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9618 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9619 SrcVT.getVectorElementCount());
9620 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9621 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9622 }
9623
9624 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9625 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9626 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9627 SDValue Select =
9628 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9629 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9630}
9631
9633 SelectionDAG &DAG) const {
9634 SDLoc DL(N);
9635 SDValue Mask = N->getOperand(0);
9636 EVT MaskVT = Mask.getValueType();
9637 EVT BoolVT = MaskVT.getScalarType();
9638
9639 // Find a suitable type for a stepvector.
9640 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
9641 if (MaskVT.isScalableVector())
9642 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
9643 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9644 unsigned EltWidth = TLI.getBitWidthForCttzElements(
9645 BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(),
9646 /*ZeroIsPoison=*/true, &VScaleRange);
9647 EVT StepVT = MVT::getIntegerVT(EltWidth);
9648 EVT StepVecVT = MaskVT.changeVectorElementType(StepVT);
9649
9650 // If promotion is required to make the type legal, do it here; promotion
9651 // of integers within LegalizeVectorOps is looking for types of the same
9652 // size but with a smaller number of larger elements, not the usual larger
9653 // size with the same number of larger elements.
9654 if (TLI.getTypeAction(StepVecVT.getSimpleVT()) ==
9656 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
9657 StepVT = StepVecVT.getVectorElementType();
9658 }
9659
9660 // Zero out lanes with inactive elements, then find the highest remaining
9661 // value from the stepvector.
9662 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
9663 SDValue StepVec = DAG.getStepVector(DL, StepVecVT);
9664 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
9665 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
9666 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
9667}
9668
9670 bool IsNegative) const {
9671 SDLoc dl(N);
9672 EVT VT = N->getValueType(0);
9673 SDValue Op = N->getOperand(0);
9674
9675 // abs(x) -> smax(x,sub(0,x))
9676 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9678 SDValue Zero = DAG.getConstant(0, dl, VT);
9679 Op = DAG.getFreeze(Op);
9680 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9681 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9682 }
9683
9684 // abs(x) -> umin(x,sub(0,x))
9685 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9687 SDValue Zero = DAG.getConstant(0, dl, VT);
9688 Op = DAG.getFreeze(Op);
9689 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9690 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9691 }
9692
9693 // 0 - abs(x) -> smin(x, sub(0,x))
9694 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9696 SDValue Zero = DAG.getConstant(0, dl, VT);
9697 Op = DAG.getFreeze(Op);
9698 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9699 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9700 }
9701
9702 // Only expand vector types if we have the appropriate vector operations.
9703 if (VT.isVector() &&
9705 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9706 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9708 return SDValue();
9709
9710 Op = DAG.getFreeze(Op);
9711 SDValue Shift = DAG.getNode(
9712 ISD::SRA, dl, VT, Op,
9713 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9714 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9715
9716 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9717 if (!IsNegative)
9718 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9719
9720 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9721 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9722}
9723
9725 SDLoc dl(N);
9726 EVT VT = N->getValueType(0);
9727 SDValue LHS = N->getOperand(0);
9728 SDValue RHS = N->getOperand(1);
9729 bool IsSigned = N->getOpcode() == ISD::ABDS;
9730
9731 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9732 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9733 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9734 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9735 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9736 LHS = DAG.getFreeze(LHS);
9737 RHS = DAG.getFreeze(RHS);
9738 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9739 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9740 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9741 }
9742
9743 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9744 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
9745 LHS = DAG.getFreeze(LHS);
9746 RHS = DAG.getFreeze(RHS);
9747 return DAG.getNode(ISD::OR, dl, VT,
9748 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9749 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9750 }
9751
9752 // If the subtract doesn't overflow then just use abs(sub())
9753 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
9754
9755 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
9756 return DAG.getNode(ISD::ABS, dl, VT,
9757 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9758
9759 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
9760 return DAG.getNode(ISD::ABS, dl, VT,
9761 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9762
9763 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9765 LHS = DAG.getFreeze(LHS);
9766 RHS = DAG.getFreeze(RHS);
9767 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9768
9769 // Branchless expansion iff cmp result is allbits:
9770 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9771 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9772 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9773 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9774 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9775 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9776 }
9777
9778 // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9779 // flag if the (scalar) type is illegal as this is more likely to legalize
9780 // cleanly:
9781 // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9782 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9783 SDValue USubO =
9784 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9785 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9786 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9787 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9788 }
9789
9790 // FIXME: Should really try to split the vector in case it's legal on a
9791 // subvector.
9793 return DAG.UnrollVectorOp(N);
9794
9795 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9796 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9797 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9798 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9799}
9800
9802 SDLoc dl(N);
9803 EVT VT = N->getValueType(0);
9804 SDValue LHS = N->getOperand(0);
9805 SDValue RHS = N->getOperand(1);
9806
9807 unsigned Opc = N->getOpcode();
9808 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9809 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9810 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9811 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9812 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9813 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9815 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9816 "Unknown AVG node");
9817
9818 // If the operands are already extended, we can add+shift.
9819 bool IsExt =
9820 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9821 DAG.ComputeNumSignBits(RHS) >= 2) ||
9822 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9823 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9824 if (IsExt) {
9825 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9826 if (!IsFloor)
9827 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9828 return DAG.getNode(ShiftOpc, dl, VT, Sum,
9829 DAG.getShiftAmountConstant(1, VT, dl));
9830 }
9831
9832 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9833 if (VT.isScalarInteger()) {
9834 unsigned BW = VT.getScalarSizeInBits();
9835 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9836 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9837 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9838 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9839 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9840 if (!IsFloor)
9841 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9842 DAG.getConstant(1, dl, ExtVT));
9843 // Just use SRL as we will be truncating away the extended sign bits.
9844 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9845 DAG.getShiftAmountConstant(1, ExtVT, dl));
9846 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9847 }
9848 }
9849
9850 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9851 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9852 SDValue UAddWithOverflow =
9853 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9854
9855 SDValue Sum = UAddWithOverflow.getValue(0);
9856 SDValue Overflow = UAddWithOverflow.getValue(1);
9857
9858 // Right shift the sum by 1
9859 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9860 DAG.getShiftAmountConstant(1, VT, dl));
9861
9862 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9863 SDValue OverflowShl = DAG.getNode(
9864 ISD::SHL, dl, VT, ZeroExtOverflow,
9865 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9866
9867 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9868 }
9869
9870 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9871 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9872 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9873 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9874 LHS = DAG.getFreeze(LHS);
9875 RHS = DAG.getFreeze(RHS);
9876 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9877 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9878 SDValue Shift =
9879 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9880 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9881}
9882
9884 SDLoc dl(N);
9885 EVT VT = N->getValueType(0);
9886 SDValue Op = N->getOperand(0);
9887
9888 if (!VT.isSimple())
9889 return SDValue();
9890
9891 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9892 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9893 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9894 default:
9895 return SDValue();
9896 case MVT::i16:
9897 // Use a rotate by 8. This can be further expanded if necessary.
9898 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9899 case MVT::i32:
9900 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9901 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9902 DAG.getConstant(0xFF00, dl, VT));
9903 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9904 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9905 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9906 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9907 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9908 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9909 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9910 case MVT::i64:
9911 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9912 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9913 DAG.getConstant(255ULL<<8, dl, VT));
9914 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9915 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9916 DAG.getConstant(255ULL<<16, dl, VT));
9917 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9918 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9919 DAG.getConstant(255ULL<<24, dl, VT));
9920 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9921 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9922 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9923 DAG.getConstant(255ULL<<24, dl, VT));
9924 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9925 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9926 DAG.getConstant(255ULL<<16, dl, VT));
9927 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9928 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9929 DAG.getConstant(255ULL<<8, dl, VT));
9930 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9931 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9932 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9933 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9934 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9935 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9936 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9937 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9938 }
9939}
9940
9942 SDLoc dl(N);
9943 EVT VT = N->getValueType(0);
9944 SDValue Op = N->getOperand(0);
9945 SDValue Mask = N->getOperand(1);
9946 SDValue EVL = N->getOperand(2);
9947
9948 if (!VT.isSimple())
9949 return SDValue();
9950
9951 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9952 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9953 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9954 default:
9955 return SDValue();
9956 case MVT::i16:
9957 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9958 Mask, EVL);
9959 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9960 Mask, EVL);
9961 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9962 case MVT::i32:
9963 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9964 Mask, EVL);
9965 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9966 Mask, EVL);
9967 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9968 Mask, EVL);
9969 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9970 Mask, EVL);
9971 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9972 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9973 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9974 Mask, EVL);
9975 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9976 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9977 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9978 case MVT::i64:
9979 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9980 Mask, EVL);
9981 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9982 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9983 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9984 Mask, EVL);
9985 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9986 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9987 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9988 Mask, EVL);
9989 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9990 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9991 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9992 Mask, EVL);
9993 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9994 Mask, EVL);
9995 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9996 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9997 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9998 Mask, EVL);
9999 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
10000 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10001 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
10002 Mask, EVL);
10003 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10004 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10005 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10006 Mask, EVL);
10007 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
10008 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
10009 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10010 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10011 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
10012 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10013 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
10014 }
10015}
10016
10018 SDLoc dl(N);
10019 EVT VT = N->getValueType(0);
10020 SDValue Op = N->getOperand(0);
10021 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10022 unsigned Sz = VT.getScalarSizeInBits();
10023
10024 SDValue Tmp, Tmp2, Tmp3;
10025
10026 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10027 // and finally the i1 pairs.
10028 // TODO: We can easily support i4/i2 legal types if any target ever does.
10029 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10030 // Create the masks - repeating the pattern every byte.
10031 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10032 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10033 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10034
10035 // BSWAP if the type is wider than a single byte.
10036 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
10037
10038 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10039 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
10040 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
10041 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
10042 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
10043 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10044
10045 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10046 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
10047 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
10048 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
10049 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
10050 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10051
10052 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10053 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
10054 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
10055 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
10056 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
10057 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10058 return Tmp;
10059 }
10060
10061 Tmp = DAG.getConstant(0, dl, VT);
10062 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
10063 if (I < J)
10064 Tmp2 =
10065 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
10066 else
10067 Tmp2 =
10068 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
10069
10070 APInt Shift = APInt::getOneBitSet(Sz, J);
10071 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
10072 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
10073 }
10074
10075 return Tmp;
10076}
10077
10079 assert(N->getOpcode() == ISD::VP_BITREVERSE);
10080
10081 SDLoc dl(N);
10082 EVT VT = N->getValueType(0);
10083 SDValue Op = N->getOperand(0);
10084 SDValue Mask = N->getOperand(1);
10085 SDValue EVL = N->getOperand(2);
10086 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10087 unsigned Sz = VT.getScalarSizeInBits();
10088
10089 SDValue Tmp, Tmp2, Tmp3;
10090
10091 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10092 // and finally the i1 pairs.
10093 // TODO: We can easily support i4/i2 legal types if any target ever does.
10094 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10095 // Create the masks - repeating the pattern every byte.
10096 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10097 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10098 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10099
10100 // BSWAP if the type is wider than a single byte.
10101 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
10102
10103 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10104 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
10105 Mask, EVL);
10106 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10107 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
10108 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
10109 Mask, EVL);
10110 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
10111 Mask, EVL);
10112 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10113
10114 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10115 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
10116 Mask, EVL);
10117 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10118 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
10119 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
10120 Mask, EVL);
10121 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
10122 Mask, EVL);
10123 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10124
10125 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10126 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
10127 Mask, EVL);
10128 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10129 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
10130 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
10131 Mask, EVL);
10132 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
10133 Mask, EVL);
10134 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10135 return Tmp;
10136 }
10137 return SDValue();
10138}
10139
10140std::pair<SDValue, SDValue>
10142 SelectionDAG &DAG) const {
10143 SDLoc SL(LD);
10144 SDValue Chain = LD->getChain();
10145 SDValue BasePTR = LD->getBasePtr();
10146 EVT SrcVT = LD->getMemoryVT();
10147 EVT DstVT = LD->getValueType(0);
10148 ISD::LoadExtType ExtType = LD->getExtensionType();
10149
10150 if (SrcVT.isScalableVector())
10151 report_fatal_error("Cannot scalarize scalable vector loads");
10152
10153 unsigned NumElem = SrcVT.getVectorNumElements();
10154
10155 EVT SrcEltVT = SrcVT.getScalarType();
10156 EVT DstEltVT = DstVT.getScalarType();
10157
10158 // A vector must always be stored in memory as-is, i.e. without any padding
10159 // between the elements, since various code depend on it, e.g. in the
10160 // handling of a bitcast of a vector type to int, which may be done with a
10161 // vector store followed by an integer load. A vector that does not have
10162 // elements that are byte-sized must therefore be stored as an integer
10163 // built out of the extracted vector elements.
10164 if (!SrcEltVT.isByteSized()) {
10165 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10166 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
10167
10168 unsigned NumSrcBits = SrcVT.getSizeInBits();
10169 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
10170
10171 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10172 SDValue SrcEltBitMask = DAG.getConstant(
10173 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
10174
10175 // Load the whole vector and avoid masking off the top bits as it makes
10176 // the codegen worse.
10177 SDValue Load =
10178 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
10179 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
10180 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10181
10183 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10184 unsigned ShiftIntoIdx =
10185 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10186 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10187 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
10188 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
10189 SDValue Elt =
10190 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
10191 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
10192
10193 if (ExtType != ISD::NON_EXTLOAD) {
10194 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
10195 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
10196 }
10197
10198 Vals.push_back(Scalar);
10199 }
10200
10201 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10202 return std::make_pair(Value, Load.getValue(1));
10203 }
10204
10205 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10206 assert(SrcEltVT.isByteSized());
10207
10209 SmallVector<SDValue, 8> LoadChains;
10210
10211 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10212 SDValue ScalarLoad = DAG.getExtLoad(
10213 ExtType, SL, DstEltVT, Chain, BasePTR,
10214 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
10215 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10216
10217 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10218
10219 Vals.push_back(ScalarLoad.getValue(0));
10220 LoadChains.push_back(ScalarLoad.getValue(1));
10221 }
10222
10223 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10224 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10225
10226 return std::make_pair(Value, NewChain);
10227}
10228
10230 SelectionDAG &DAG) const {
10231 SDLoc SL(ST);
10232
10233 SDValue Chain = ST->getChain();
10234 SDValue BasePtr = ST->getBasePtr();
10235 SDValue Value = ST->getValue();
10236 EVT StVT = ST->getMemoryVT();
10237
10238 if (StVT.isScalableVector())
10239 report_fatal_error("Cannot scalarize scalable vector stores");
10240
10241 // The type of the data we want to save
10242 EVT RegVT = Value.getValueType();
10243 EVT RegSclVT = RegVT.getScalarType();
10244
10245 // The type of data as saved in memory.
10246 EVT MemSclVT = StVT.getScalarType();
10247
10248 unsigned NumElem = StVT.getVectorNumElements();
10249
10250 // A vector must always be stored in memory as-is, i.e. without any padding
10251 // between the elements, since various code depend on it, e.g. in the
10252 // handling of a bitcast of a vector type to int, which may be done with a
10253 // vector store followed by an integer load. A vector that does not have
10254 // elements that are byte-sized must therefore be stored as an integer
10255 // built out of the extracted vector elements.
10256 if (!MemSclVT.isByteSized()) {
10257 unsigned NumBits = StVT.getSizeInBits();
10258 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10259
10260 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10261
10262 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10263 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10264 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10265 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10266 unsigned ShiftIntoIdx =
10267 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10268 SDValue ShiftAmount =
10269 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10270 SDValue ShiftedElt =
10271 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10272 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10273 }
10274
10275 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10276 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10277 ST->getAAInfo());
10278 }
10279
10280 // Store Stride in bytes
10281 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10282 assert(Stride && "Zero stride!");
10283 // Extract each of the elements from the original vector and save them into
10284 // memory individually.
10286 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10287 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10288
10289 SDValue Ptr =
10290 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10291
10292 // This scalar TruncStore may be illegal, but we legalize it later.
10293 SDValue Store = DAG.getTruncStore(
10294 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10295 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10296 ST->getAAInfo());
10297
10298 Stores.push_back(Store);
10299 }
10300
10301 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10302}
10303
10304std::pair<SDValue, SDValue>
10306 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10307 "unaligned indexed loads not implemented!");
10308 SDValue Chain = LD->getChain();
10309 SDValue Ptr = LD->getBasePtr();
10310 EVT VT = LD->getValueType(0);
10311 EVT LoadedVT = LD->getMemoryVT();
10312 SDLoc dl(LD);
10313 auto &MF = DAG.getMachineFunction();
10314
10315 if (VT.isFloatingPoint() || VT.isVector()) {
10316 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10317 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10318 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10319 LoadedVT.isVector()) {
10320 // Scalarize the load and let the individual components be handled.
10321 return scalarizeVectorLoad(LD, DAG);
10322 }
10323
10324 // Expand to a (misaligned) integer load of the same size,
10325 // then bitconvert to floating point or vector.
10326 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10327 LD->getMemOperand());
10328 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10329 if (LoadedVT != VT)
10330 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10331 ISD::ANY_EXTEND, dl, VT, Result);
10332
10333 return std::make_pair(Result, newLoad.getValue(1));
10334 }
10335
10336 // Copy the value to a (aligned) stack slot using (unaligned) integer
10337 // loads and stores, then do a (aligned) load from the stack slot.
10338 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10339 unsigned LoadedBytes = LoadedVT.getStoreSize();
10340 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10341 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10342
10343 // Make sure the stack slot is also aligned for the register type.
10344 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10345 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10347 SDValue StackPtr = StackBase;
10348 unsigned Offset = 0;
10349
10350 EVT PtrVT = Ptr.getValueType();
10351 EVT StackPtrVT = StackPtr.getValueType();
10352
10353 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10354 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10355
10356 // Do all but one copies using the full register width.
10357 for (unsigned i = 1; i < NumRegs; i++) {
10358 // Load one integer register's worth from the original location.
10359 SDValue Load = DAG.getLoad(
10360 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10361 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10362 // Follow the load with a store to the stack slot. Remember the store.
10363 Stores.push_back(DAG.getStore(
10364 Load.getValue(1), dl, Load, StackPtr,
10365 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10366 // Increment the pointers.
10367 Offset += RegBytes;
10368
10369 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10370 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10371 }
10372
10373 // The last copy may be partial. Do an extending load.
10374 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10375 8 * (LoadedBytes - Offset));
10376 SDValue Load = DAG.getExtLoad(
10377 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10378 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
10379 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10380 // Follow the load with a store to the stack slot. Remember the store.
10381 // On big-endian machines this requires a truncating store to ensure
10382 // that the bits end up in the right place.
10383 Stores.push_back(DAG.getTruncStore(
10384 Load.getValue(1), dl, Load, StackPtr,
10385 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10386
10387 // The order of the stores doesn't matter - say it with a TokenFactor.
10388 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10389
10390 // Finally, perform the original load only redirected to the stack slot.
10391 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10392 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10393 LoadedVT);
10394
10395 // Callers expect a MERGE_VALUES node.
10396 return std::make_pair(Load, TF);
10397 }
10398
10399 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10400 "Unaligned load of unsupported type.");
10401
10402 // Compute the new VT that is half the size of the old one. This is an
10403 // integer MVT.
10404 unsigned NumBits = LoadedVT.getSizeInBits();
10405 EVT NewLoadedVT;
10406 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10407 NumBits >>= 1;
10408
10409 Align Alignment = LD->getBaseAlign();
10410 unsigned IncrementSize = NumBits / 8;
10411 ISD::LoadExtType HiExtType = LD->getExtensionType();
10412
10413 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10414 if (HiExtType == ISD::NON_EXTLOAD)
10415 HiExtType = ISD::ZEXTLOAD;
10416
10417 // Load the value in two parts
10418 SDValue Lo, Hi;
10419 if (DAG.getDataLayout().isLittleEndian()) {
10420 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10421 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10422 LD->getAAInfo());
10423
10424 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10425 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10426 LD->getPointerInfo().getWithOffset(IncrementSize),
10427 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10428 LD->getAAInfo());
10429 } else {
10430 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10431 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10432 LD->getAAInfo());
10433
10434 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10435 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10436 LD->getPointerInfo().getWithOffset(IncrementSize),
10437 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10438 LD->getAAInfo());
10439 }
10440
10441 // aggregate the two parts
10442 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10443 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10444 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10445
10446 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10447 Hi.getValue(1));
10448
10449 return std::make_pair(Result, TF);
10450}
10451
10453 SelectionDAG &DAG) const {
10454 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10455 "unaligned indexed stores not implemented!");
10456 SDValue Chain = ST->getChain();
10457 SDValue Ptr = ST->getBasePtr();
10458 SDValue Val = ST->getValue();
10459 EVT VT = Val.getValueType();
10460 Align Alignment = ST->getBaseAlign();
10461 auto &MF = DAG.getMachineFunction();
10462 EVT StoreMemVT = ST->getMemoryVT();
10463
10464 SDLoc dl(ST);
10465 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10466 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10467 if (isTypeLegal(intVT)) {
10468 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10469 StoreMemVT.isVector()) {
10470 // Scalarize the store and let the individual components be handled.
10471 SDValue Result = scalarizeVectorStore(ST, DAG);
10472 return Result;
10473 }
10474 // Expand to a bitconvert of the value to the integer type of the
10475 // same size, then a (misaligned) int store.
10476 // FIXME: Does not handle truncating floating point stores!
10477 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10478 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10479 Alignment, ST->getMemOperand()->getFlags());
10480 return Result;
10481 }
10482 // Do a (aligned) store to a stack slot, then copy from the stack slot
10483 // to the final destination using (unaligned) integer loads and stores.
10484 MVT RegVT = getRegisterType(
10485 *DAG.getContext(),
10486 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10487 EVT PtrVT = Ptr.getValueType();
10488 unsigned StoredBytes = StoreMemVT.getStoreSize();
10489 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10490 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10491
10492 // Make sure the stack slot is also aligned for the register type.
10493 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10494 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10495
10496 // Perform the original store, only redirected to the stack slot.
10497 SDValue Store = DAG.getTruncStore(
10498 Chain, dl, Val, StackPtr,
10499 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10500
10501 EVT StackPtrVT = StackPtr.getValueType();
10502
10503 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10504 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10506 unsigned Offset = 0;
10507
10508 // Do all but one copies using the full register width.
10509 for (unsigned i = 1; i < NumRegs; i++) {
10510 // Load one integer register's worth from the stack slot.
10511 SDValue Load = DAG.getLoad(
10512 RegVT, dl, Store, StackPtr,
10513 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10514 // Store it to the final location. Remember the store.
10515 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10516 ST->getPointerInfo().getWithOffset(Offset),
10517 ST->getBaseAlign(),
10518 ST->getMemOperand()->getFlags()));
10519 // Increment the pointers.
10520 Offset += RegBytes;
10521 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10522 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10523 }
10524
10525 // The last store may be partial. Do a truncating store. On big-endian
10526 // machines this requires an extending load from the stack slot to ensure
10527 // that the bits are in the right place.
10528 EVT LoadMemVT =
10529 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10530
10531 // Load from the stack slot.
10532 SDValue Load = DAG.getExtLoad(
10533 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10534 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10535
10536 Stores.push_back(DAG.getTruncStore(
10537 Load.getValue(1), dl, Load, Ptr,
10538 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10539 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10540 // The order of the stores doesn't matter - say it with a TokenFactor.
10541 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10542 return Result;
10543 }
10544
10545 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10546 "Unaligned store of unknown type.");
10547 // Get the half-size VT
10548 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10549 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10550 unsigned IncrementSize = NumBits / 8;
10551
10552 // Divide the stored value in two parts.
10553 SDValue ShiftAmount =
10554 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10555 SDValue Lo = Val;
10556 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10557 // fold and not use the upper bits. A smaller constant may be easier to
10558 // materialize.
10559 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10560 Lo = DAG.getNode(
10561 ISD::AND, dl, VT, Lo,
10562 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10563 VT));
10564 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10565
10566 // Store the two parts
10567 SDValue Store1, Store2;
10568 Store1 = DAG.getTruncStore(Chain, dl,
10569 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10570 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10571 ST->getMemOperand()->getFlags());
10572
10573 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10574 Store2 = DAG.getTruncStore(
10575 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10576 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10577 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10578
10579 SDValue Result =
10580 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10581 return Result;
10582}
10583
10584SDValue
10586 const SDLoc &DL, EVT DataVT,
10587 SelectionDAG &DAG,
10588 bool IsCompressedMemory) const {
10589 SDValue Increment;
10590 EVT AddrVT = Addr.getValueType();
10591 EVT MaskVT = Mask.getValueType();
10592 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10593 "Incompatible types of Data and Mask");
10594 if (IsCompressedMemory) {
10595 if (DataVT.isScalableVector())
10597 "Cannot currently handle compressed memory with scalable vectors");
10598 // Incrementing the pointer according to number of '1's in the mask.
10599 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10600 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10601 if (MaskIntVT.getSizeInBits() < 32) {
10602 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10603 MaskIntVT = MVT::i32;
10604 }
10605
10606 // Count '1's with POPCNT.
10607 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10608 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10609 // Scale is an element size in bytes.
10610 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10611 AddrVT);
10612 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10613 } else if (DataVT.isScalableVector()) {
10614 Increment = DAG.getVScale(DL, AddrVT,
10615 APInt(AddrVT.getFixedSizeInBits(),
10616 DataVT.getStoreSize().getKnownMinValue()));
10617 } else
10618 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10619
10620 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10621}
10622
10624 EVT VecVT, const SDLoc &dl,
10625 ElementCount SubEC) {
10626 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10627 "Cannot index a scalable vector within a fixed-width vector");
10628
10629 unsigned NElts = VecVT.getVectorMinNumElements();
10630 unsigned NumSubElts = SubEC.getKnownMinValue();
10631 EVT IdxVT = Idx.getValueType();
10632
10633 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10634 // If this is a constant index and we know the value plus the number of the
10635 // elements in the subvector minus one is less than the minimum number of
10636 // elements then it's safe to return Idx.
10637 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10638 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10639 return Idx;
10640 SDValue VS =
10641 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10642 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10643 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10644 DAG.getConstant(NumSubElts, dl, IdxVT));
10645 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10646 }
10647 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10648 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10649 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10650 DAG.getConstant(Imm, dl, IdxVT));
10651 }
10652 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10653 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10654 DAG.getConstant(MaxIndex, dl, IdxVT));
10655}
10656
10658 SDValue VecPtr, EVT VecVT,
10659 SDValue Index) const {
10661 DAG, VecPtr, VecVT,
10663 Index);
10664}
10665
10667 SDValue VecPtr, EVT VecVT,
10668 EVT SubVecVT,
10669 SDValue Index) const {
10670 SDLoc dl(Index);
10671 // Make sure the index type is big enough to compute in.
10672 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10673
10674 EVT EltVT = VecVT.getVectorElementType();
10675
10676 // Calculate the element offset and add it to the pointer.
10677 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10678 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10679 "Converting bits to bytes lost precision");
10680 assert(SubVecVT.getVectorElementType() == EltVT &&
10681 "Sub-vector must be a vector with matching element type");
10682 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10683 SubVecVT.getVectorElementCount());
10684
10685 EVT IdxVT = Index.getValueType();
10686 if (SubVecVT.isScalableVector())
10687 Index =
10688 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10689 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10690
10691 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10692 DAG.getConstant(EltSize, dl, IdxVT));
10693 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10694}
10695
10696//===----------------------------------------------------------------------===//
10697// Implementation of Emulated TLS Model
10698//===----------------------------------------------------------------------===//
10699
10701 SelectionDAG &DAG) const {
10702 // Access to address of TLS varialbe xyz is lowered to a function call:
10703 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10704 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10705 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10706 SDLoc dl(GA);
10707
10708 ArgListTy Args;
10709 const GlobalValue *GV =
10711 SmallString<32> NameString("__emutls_v.");
10712 NameString += GV->getName();
10713 StringRef EmuTlsVarName(NameString);
10714 const GlobalVariable *EmuTlsVar =
10715 GV->getParent()->getNamedGlobal(EmuTlsVarName);
10716 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10717 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
10718
10719 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10720
10722 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10723 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10724 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10725
10726 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10727 // At last for X86 targets, maybe good for other targets too?
10729 MFI.setAdjustsStack(true); // Is this only for X86 target?
10730 MFI.setHasCalls(true);
10731
10732 assert((GA->getOffset() == 0) &&
10733 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10734 return CallResult.first;
10735}
10736
10738 SelectionDAG &DAG) const {
10739 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10740 if (!isCtlzFast())
10741 return SDValue();
10742 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10743 SDLoc dl(Op);
10744 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10745 EVT VT = Op.getOperand(0).getValueType();
10746 SDValue Zext = Op.getOperand(0);
10747 if (VT.bitsLT(MVT::i32)) {
10748 VT = MVT::i32;
10749 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10750 }
10751 unsigned Log2b = Log2_32(VT.getSizeInBits());
10752 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10753 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10754 DAG.getConstant(Log2b, dl, MVT::i32));
10755 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10756 }
10757 return SDValue();
10758}
10759
10761 SDValue Op0 = Node->getOperand(0);
10762 SDValue Op1 = Node->getOperand(1);
10763 EVT VT = Op0.getValueType();
10764 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10765 unsigned Opcode = Node->getOpcode();
10766 SDLoc DL(Node);
10767
10768 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10769 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10771 Op0 = DAG.getFreeze(Op0);
10772 SDValue Zero = DAG.getConstant(0, DL, VT);
10773 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10774 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10775 }
10776
10777 // umin(x,y) -> sub(x,usubsat(x,y))
10778 // TODO: Missing freeze(Op0)?
10779 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10781 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10782 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10783 }
10784
10785 // umax(x,y) -> add(x,usubsat(y,x))
10786 // TODO: Missing freeze(Op0)?
10787 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10789 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10790 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10791 }
10792
10793 // FIXME: Should really try to split the vector in case it's legal on a
10794 // subvector.
10796 return DAG.UnrollVectorOp(Node);
10797
10798 // Attempt to find an existing SETCC node that we can reuse.
10799 // TODO: Do we need a generic doesSETCCNodeExist?
10800 // TODO: Missing freeze(Op0)/freeze(Op1)?
10801 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10802 ISD::CondCode PrefCommuteCC,
10803 ISD::CondCode AltCommuteCC) {
10804 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10805 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10806 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10807 {Op0, Op1, DAG.getCondCode(CC)})) {
10808 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10809 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10810 }
10811 }
10812 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10813 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10814 {Op0, Op1, DAG.getCondCode(CC)})) {
10815 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10816 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10817 }
10818 }
10819 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10820 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10821 };
10822
10823 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10824 // -> Y = (A < B) ? B : A
10825 // -> Y = (A >= B) ? A : B
10826 // -> Y = (A <= B) ? B : A
10827 switch (Opcode) {
10828 case ISD::SMAX:
10829 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10830 case ISD::SMIN:
10831 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10832 case ISD::UMAX:
10833 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10834 case ISD::UMIN:
10835 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10836 }
10837
10838 llvm_unreachable("How did we get here?");
10839}
10840
10842 unsigned Opcode = Node->getOpcode();
10843 SDValue LHS = Node->getOperand(0);
10844 SDValue RHS = Node->getOperand(1);
10845 EVT VT = LHS.getValueType();
10846 SDLoc dl(Node);
10847
10848 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10849 assert(VT.isInteger() && "Expected operands to be integers");
10850
10851 // usub.sat(a, b) -> umax(a, b) - b
10852 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10853 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10854 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10855 }
10856
10857 // uadd.sat(a, b) -> umin(a, ~b) + b
10858 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10859 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10860 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10861 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10862 }
10863
10864 unsigned OverflowOp;
10865 switch (Opcode) {
10866 case ISD::SADDSAT:
10867 OverflowOp = ISD::SADDO;
10868 break;
10869 case ISD::UADDSAT:
10870 OverflowOp = ISD::UADDO;
10871 break;
10872 case ISD::SSUBSAT:
10873 OverflowOp = ISD::SSUBO;
10874 break;
10875 case ISD::USUBSAT:
10876 OverflowOp = ISD::USUBO;
10877 break;
10878 default:
10879 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10880 "addition or subtraction node.");
10881 }
10882
10883 // FIXME: Should really try to split the vector in case it's legal on a
10884 // subvector.
10886 return DAG.UnrollVectorOp(Node);
10887
10888 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10889 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10890 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10891 SDValue SumDiff = Result.getValue(0);
10892 SDValue Overflow = Result.getValue(1);
10893 SDValue Zero = DAG.getConstant(0, dl, VT);
10894 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10895
10896 if (Opcode == ISD::UADDSAT) {
10898 // (LHS + RHS) | OverflowMask
10899 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10900 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10901 }
10902 // Overflow ? 0xffff.... : (LHS + RHS)
10903 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10904 }
10905
10906 if (Opcode == ISD::USUBSAT) {
10908 // (LHS - RHS) & ~OverflowMask
10909 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10910 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10911 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10912 }
10913 // Overflow ? 0 : (LHS - RHS)
10914 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10915 }
10916
10917 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10920
10921 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10922 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10923
10924 // If either of the operand signs are known, then they are guaranteed to
10925 // only saturate in one direction. If non-negative they will saturate
10926 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10927 //
10928 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10929 // sign of 'y' has to be flipped.
10930
10931 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10932 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10933 : KnownRHS.isNegative();
10934 if (LHSIsNonNegative || RHSIsNonNegative) {
10935 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10936 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10937 }
10938
10939 bool LHSIsNegative = KnownLHS.isNegative();
10940 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10941 : KnownRHS.isNonNegative();
10942 if (LHSIsNegative || RHSIsNegative) {
10943 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10944 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10945 }
10946 }
10947
10948 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10950 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10951 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10952 DAG.getConstant(BitWidth - 1, dl, VT));
10953 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10954 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10955}
10956
10958 unsigned Opcode = Node->getOpcode();
10959 SDValue LHS = Node->getOperand(0);
10960 SDValue RHS = Node->getOperand(1);
10961 EVT VT = LHS.getValueType();
10962 EVT ResVT = Node->getValueType(0);
10963 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10964 SDLoc dl(Node);
10965
10966 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10967 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10968 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10969 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10970
10971 // We can't perform arithmetic on i1 values. Extending them would
10972 // probably result in worse codegen, so let's just use two selects instead.
10973 // Some targets are also just better off using selects rather than subtraction
10974 // because one of the conditions can be merged with one of the selects.
10975 // And finally, if we don't know the contents of high bits of a boolean value
10976 // we can't perform any arithmetic either.
10977 if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10979 SDValue SelectZeroOrOne =
10980 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10981 DAG.getConstant(0, dl, ResVT));
10982 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10983 SelectZeroOrOne);
10984 }
10985
10987 std::swap(IsGT, IsLT);
10988 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10989 ResVT);
10990}
10991
10993 unsigned Opcode = Node->getOpcode();
10994 bool IsSigned = Opcode == ISD::SSHLSAT;
10995 SDValue LHS = Node->getOperand(0);
10996 SDValue RHS = Node->getOperand(1);
10997 EVT VT = LHS.getValueType();
10998 SDLoc dl(Node);
10999
11000 assert((Node->getOpcode() == ISD::SSHLSAT ||
11001 Node->getOpcode() == ISD::USHLSAT) &&
11002 "Expected a SHLSAT opcode");
11003 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11004 assert(VT.isInteger() && "Expected operands to be integers");
11005
11007 return DAG.UnrollVectorOp(Node);
11008
11009 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
11010
11011 unsigned BW = VT.getScalarSizeInBits();
11012 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11013 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
11014 SDValue Orig =
11015 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
11016
11017 SDValue SatVal;
11018 if (IsSigned) {
11019 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
11020 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
11021 SDValue Cond =
11022 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
11023 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
11024 } else {
11025 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
11026 }
11027 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
11028 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
11029}
11030
11032 bool Signed, SDValue &Lo, SDValue &Hi,
11033 SDValue LHS, SDValue RHS,
11034 SDValue HiLHS, SDValue HiRHS) const {
11035 EVT VT = LHS.getValueType();
11036 assert(RHS.getValueType() == VT && "Mismatching operand types");
11037
11038 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
11039 assert((!Signed || !HiLHS) &&
11040 "Signed flag should only be set when HiLHS and RiRHS are null");
11041
11042 // We'll expand the multiplication by brute force because we have no other
11043 // options. This is a trivially-generalized version of the code from
11044 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
11045 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11046 // sign bits while calculating the Hi half.
11047 unsigned Bits = VT.getSizeInBits();
11048 unsigned HalfBits = Bits / 2;
11049 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
11050 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
11051 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
11052
11053 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
11054 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
11055
11056 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
11057 // This is always an unsigned shift.
11058 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
11059
11060 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11061 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
11062 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
11063
11064 SDValue U =
11065 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
11066 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
11067 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
11068
11069 SDValue V =
11070 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
11071 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
11072
11073 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
11074 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
11075
11076 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
11077 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
11078
11079 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11080 // the products to Hi.
11081 if (HiLHS) {
11082 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
11083 DAG.getNode(ISD::ADD, dl, VT,
11084 DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS),
11085 DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS)));
11086 }
11087}
11088
11090 bool Signed, const SDValue LHS,
11091 const SDValue RHS, SDValue &Lo,
11092 SDValue &Hi) const {
11093 EVT VT = LHS.getValueType();
11094 assert(RHS.getValueType() == VT && "Mismatching operand types");
11095 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
11096 // We can fall back to a libcall with an illegal type for the MUL if we
11097 // have a libcall big enough.
11098 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11099 if (WideVT == MVT::i16)
11100 LC = RTLIB::MUL_I16;
11101 else if (WideVT == MVT::i32)
11102 LC = RTLIB::MUL_I32;
11103 else if (WideVT == MVT::i64)
11104 LC = RTLIB::MUL_I64;
11105 else if (WideVT == MVT::i128)
11106 LC = RTLIB::MUL_I128;
11107
11108 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
11109 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11110 return;
11111 }
11112
11113 SDValue HiLHS, HiRHS;
11114 if (Signed) {
11115 // The high part is obtained by SRA'ing all but one of the bits of low
11116 // part.
11117 unsigned LoSize = VT.getFixedSizeInBits();
11118 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
11119 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
11120 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
11121 } else {
11122 HiLHS = DAG.getConstant(0, dl, VT);
11123 HiRHS = DAG.getConstant(0, dl, VT);
11124 }
11125
11126 // Attempt a libcall.
11127 SDValue Ret;
11129 CallOptions.setIsSigned(Signed);
11130 CallOptions.setIsPostTypeLegalization(true);
11132 // Halves of WideVT are packed into registers in different order
11133 // depending on platform endianness. This is usually handled by
11134 // the C calling convention, but we can't defer to it in
11135 // the legalizer.
11136 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11137 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11138 } else {
11139 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11140 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11141 }
11142 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
11143 "Ret value is a collection of constituent nodes holding result.");
11144 if (DAG.getDataLayout().isLittleEndian()) {
11145 // Same as above.
11146 Lo = Ret.getOperand(0);
11147 Hi = Ret.getOperand(1);
11148 } else {
11149 Lo = Ret.getOperand(1);
11150 Hi = Ret.getOperand(0);
11151 }
11152}
11153
11154SDValue
11156 assert((Node->getOpcode() == ISD::SMULFIX ||
11157 Node->getOpcode() == ISD::UMULFIX ||
11158 Node->getOpcode() == ISD::SMULFIXSAT ||
11159 Node->getOpcode() == ISD::UMULFIXSAT) &&
11160 "Expected a fixed point multiplication opcode");
11161
11162 SDLoc dl(Node);
11163 SDValue LHS = Node->getOperand(0);
11164 SDValue RHS = Node->getOperand(1);
11165 EVT VT = LHS.getValueType();
11166 unsigned Scale = Node->getConstantOperandVal(2);
11167 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
11168 Node->getOpcode() == ISD::UMULFIXSAT);
11169 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
11170 Node->getOpcode() == ISD::SMULFIXSAT);
11171 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11172 unsigned VTSize = VT.getScalarSizeInBits();
11173
11174 if (!Scale) {
11175 // [us]mul.fix(a, b, 0) -> mul(a, b)
11176 if (!Saturating) {
11178 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11179 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
11180 SDValue Result =
11181 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11182 SDValue Product = Result.getValue(0);
11183 SDValue Overflow = Result.getValue(1);
11184 SDValue Zero = DAG.getConstant(0, dl, VT);
11185
11186 APInt MinVal = APInt::getSignedMinValue(VTSize);
11187 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
11188 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11189 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11190 // Xor the inputs, if resulting sign bit is 0 the product will be
11191 // positive, else negative.
11192 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
11193 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
11194 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
11195 return DAG.getSelect(dl, VT, Overflow, Result, Product);
11196 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
11197 SDValue Result =
11198 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11199 SDValue Product = Result.getValue(0);
11200 SDValue Overflow = Result.getValue(1);
11201
11202 APInt MaxVal = APInt::getMaxValue(VTSize);
11203 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11204 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
11205 }
11206 }
11207
11208 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11209 "Expected scale to be less than the number of bits if signed or at "
11210 "most the number of bits if unsigned.");
11211 assert(LHS.getValueType() == RHS.getValueType() &&
11212 "Expected both operands to be the same type");
11213
11214 // Get the upper and lower bits of the result.
11215 SDValue Lo, Hi;
11216 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11217 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11218 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11219 if (VT.isVector())
11220 WideVT =
11222 if (isOperationLegalOrCustom(LoHiOp, VT)) {
11223 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11224 Lo = Result.getValue(0);
11225 Hi = Result.getValue(1);
11226 } else if (isOperationLegalOrCustom(HiOp, VT)) {
11227 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11228 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11229 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11230 // Try for a multiplication using a wider type.
11231 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11232 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11233 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11234 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11235 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11236 SDValue Shifted =
11237 DAG.getNode(ISD::SRA, dl, WideVT, Res,
11238 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11239 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11240 } else if (VT.isVector()) {
11241 return SDValue();
11242 } else {
11243 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11244 }
11245
11246 if (Scale == VTSize)
11247 // Result is just the top half since we'd be shifting by the width of the
11248 // operand. Overflow impossible so this works for both UMULFIX and
11249 // UMULFIXSAT.
11250 return Hi;
11251
11252 // The result will need to be shifted right by the scale since both operands
11253 // are scaled. The result is given to us in 2 halves, so we only want part of
11254 // both in the result.
11255 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11256 DAG.getShiftAmountConstant(Scale, VT, dl));
11257 if (!Saturating)
11258 return Result;
11259
11260 if (!Signed) {
11261 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11262 // widened multiplication) aren't all zeroes.
11263
11264 // Saturate to max if ((Hi >> Scale) != 0),
11265 // which is the same as if (Hi > ((1 << Scale) - 1))
11266 APInt MaxVal = APInt::getMaxValue(VTSize);
11267 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11268 dl, VT);
11269 Result = DAG.getSelectCC(dl, Hi, LowMask,
11270 DAG.getConstant(MaxVal, dl, VT), Result,
11271 ISD::SETUGT);
11272
11273 return Result;
11274 }
11275
11276 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11277 // widened multiplication) aren't all ones or all zeroes.
11278
11279 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11280 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11281
11282 if (Scale == 0) {
11283 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11284 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11285 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11286 // Saturated to SatMin if wide product is negative, and SatMax if wide
11287 // product is positive ...
11288 SDValue Zero = DAG.getConstant(0, dl, VT);
11289 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11290 ISD::SETLT);
11291 // ... but only if we overflowed.
11292 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11293 }
11294
11295 // We handled Scale==0 above so all the bits to examine is in Hi.
11296
11297 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11298 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11299 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11300 dl, VT);
11301 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11302 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11303 // which is the same as if (HI < (-1 << (Scale - 1))
11304 SDValue HighMask =
11305 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11306 dl, VT);
11307 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11308 return Result;
11309}
11310
11311SDValue
11313 SDValue LHS, SDValue RHS,
11314 unsigned Scale, SelectionDAG &DAG) const {
11315 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11316 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11317 "Expected a fixed point division opcode");
11318
11319 EVT VT = LHS.getValueType();
11320 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11321 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11322 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11323
11324 // If there is enough room in the type to upscale the LHS or downscale the
11325 // RHS before the division, we can perform it in this type without having to
11326 // resize. For signed operations, the LHS headroom is the number of
11327 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11328 // The headroom for the RHS is the number of trailing zeroes.
11329 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11331 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11332
11333 // For signed saturating operations, we need to be able to detect true integer
11334 // division overflow; that is, when you have MIN / -EPS. However, this
11335 // is undefined behavior and if we emit divisions that could take such
11336 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11337 // example).
11338 // Avoid this by requiring an extra bit so that we never get this case.
11339 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11340 // signed saturating division, we need to emit a whopping 32-bit division.
11341 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11342 return SDValue();
11343
11344 unsigned LHSShift = std::min(LHSLead, Scale);
11345 unsigned RHSShift = Scale - LHSShift;
11346
11347 // At this point, we know that if we shift the LHS up by LHSShift and the
11348 // RHS down by RHSShift, we can emit a regular division with a final scaling
11349 // factor of Scale.
11350
11351 if (LHSShift)
11352 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11353 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11354 if (RHSShift)
11355 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11356 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11357
11358 SDValue Quot;
11359 if (Signed) {
11360 // For signed operations, if the resulting quotient is negative and the
11361 // remainder is nonzero, subtract 1 from the quotient to round towards
11362 // negative infinity.
11363 SDValue Rem;
11364 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11365 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11366 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11367 if (isTypeLegal(VT) &&
11369 Quot = DAG.getNode(ISD::SDIVREM, dl,
11370 DAG.getVTList(VT, VT),
11371 LHS, RHS);
11372 Rem = Quot.getValue(1);
11373 Quot = Quot.getValue(0);
11374 } else {
11375 Quot = DAG.getNode(ISD::SDIV, dl, VT,
11376 LHS, RHS);
11377 Rem = DAG.getNode(ISD::SREM, dl, VT,
11378 LHS, RHS);
11379 }
11380 SDValue Zero = DAG.getConstant(0, dl, VT);
11381 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11382 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11383 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11384 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11385 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11386 DAG.getConstant(1, dl, VT));
11387 Quot = DAG.getSelect(dl, VT,
11388 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11389 Sub1, Quot);
11390 } else
11391 Quot = DAG.getNode(ISD::UDIV, dl, VT,
11392 LHS, RHS);
11393
11394 return Quot;
11395}
11396
11398 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11399 SDLoc dl(Node);
11400 SDValue LHS = Node->getOperand(0);
11401 SDValue RHS = Node->getOperand(1);
11402 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11403
11404 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11405 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11406 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11407 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11408 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11409 { LHS, RHS, CarryIn });
11410 Result = SDValue(NodeCarry.getNode(), 0);
11411 Overflow = SDValue(NodeCarry.getNode(), 1);
11412 return;
11413 }
11414
11415 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11416 LHS.getValueType(), LHS, RHS);
11417
11418 EVT ResultType = Node->getValueType(1);
11419 EVT SetCCType = getSetCCResultType(
11420 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11421 SDValue SetCC;
11422 if (IsAdd && isOneConstant(RHS)) {
11423 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11424 // the live range of X. We assume comparing with 0 is cheap.
11425 // The general case (X + C) < C is not necessarily beneficial. Although we
11426 // reduce the live range of X, we may introduce the materialization of
11427 // constant C.
11428 SetCC =
11429 DAG.getSetCC(dl, SetCCType, Result,
11430 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11431 } else if (IsAdd && isAllOnesConstant(RHS)) {
11432 // Special case: uaddo X, -1 overflows if X != 0.
11433 SetCC =
11434 DAG.getSetCC(dl, SetCCType, LHS,
11435 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11436 } else {
11437 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11438 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11439 }
11440 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11441}
11442
11444 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11445 SDLoc dl(Node);
11446 SDValue LHS = Node->getOperand(0);
11447 SDValue RHS = Node->getOperand(1);
11448 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11449
11450 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11451 LHS.getValueType(), LHS, RHS);
11452
11453 EVT ResultType = Node->getValueType(1);
11454 EVT OType = getSetCCResultType(
11455 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11456
11457 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11458 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11459 if (isOperationLegal(OpcSat, LHS.getValueType())) {
11460 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11461 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11462 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11463 return;
11464 }
11465
11466 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11467
11468 // For an addition, the result should be less than one of the operands (LHS)
11469 // if and only if the other operand (RHS) is negative, otherwise there will
11470 // be overflow.
11471 // For a subtraction, the result should be less than one of the operands
11472 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11473 // otherwise there will be overflow.
11474 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11475 SDValue ConditionRHS =
11476 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11477
11478 Overflow = DAG.getBoolExtOrTrunc(
11479 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11480 ResultType, ResultType);
11481}
11482
11484 SDValue &Overflow, SelectionDAG &DAG) const {
11485 SDLoc dl(Node);
11486 EVT VT = Node->getValueType(0);
11487 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11488 SDValue LHS = Node->getOperand(0);
11489 SDValue RHS = Node->getOperand(1);
11490 bool isSigned = Node->getOpcode() == ISD::SMULO;
11491
11492 // For power-of-two multiplications we can use a simpler shift expansion.
11493 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11494 const APInt &C = RHSC->getAPIntValue();
11495 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11496 if (C.isPowerOf2()) {
11497 // smulo(x, signed_min) is same as umulo(x, signed_min).
11498 bool UseArithShift = isSigned && !C.isMinSignedValue();
11499 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11500 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11501 Overflow = DAG.getSetCC(dl, SetCCVT,
11502 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11503 dl, VT, Result, ShiftAmt),
11504 LHS, ISD::SETNE);
11505 return true;
11506 }
11507 }
11508
11509 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11510 if (VT.isVector())
11511 WideVT =
11513
11514 SDValue BottomHalf;
11515 SDValue TopHalf;
11516 static const unsigned Ops[2][3] =
11519 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11520 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11521 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11522 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11523 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11524 RHS);
11525 TopHalf = BottomHalf.getValue(1);
11526 } else if (isTypeLegal(WideVT)) {
11527 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11528 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11529 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11530 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11531 SDValue ShiftAmt =
11532 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11533 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11534 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11535 } else {
11536 if (VT.isVector())
11537 return false;
11538
11539 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11540 }
11541
11542 Result = BottomHalf;
11543 if (isSigned) {
11544 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11545 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11546 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11547 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11548 } else {
11549 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11550 DAG.getConstant(0, dl, VT), ISD::SETNE);
11551 }
11552
11553 // Truncate the result if SetCC returns a larger type than needed.
11554 EVT RType = Node->getValueType(1);
11555 if (RType.bitsLT(Overflow.getValueType()))
11556 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11557
11558 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11559 "Unexpected result type for S/UMULO legalization");
11560 return true;
11561}
11562
11564 SDLoc dl(Node);
11565 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11566 SDValue Op = Node->getOperand(0);
11567 EVT VT = Op.getValueType();
11568
11569 // Try to use a shuffle reduction for power of two vectors.
11570 if (VT.isPow2VectorType()) {
11572 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11573 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11574 break;
11575
11576 SDValue Lo, Hi;
11577 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11578 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11579 VT = HalfVT;
11580
11581 // Stop if splitting is enough to make the reduction legal.
11582 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
11583 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
11584 Node->getFlags());
11585 }
11586 }
11587
11588 if (VT.isScalableVector())
11590 "Expanding reductions for scalable vectors is undefined.");
11591
11592 EVT EltVT = VT.getVectorElementType();
11593 unsigned NumElts = VT.getVectorNumElements();
11594
11596 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11597
11598 SDValue Res = Ops[0];
11599 for (unsigned i = 1; i < NumElts; i++)
11600 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11601
11602 // Result type may be wider than element type.
11603 if (EltVT != Node->getValueType(0))
11604 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11605 return Res;
11606}
11607
11609 SDLoc dl(Node);
11610 SDValue AccOp = Node->getOperand(0);
11611 SDValue VecOp = Node->getOperand(1);
11612 SDNodeFlags Flags = Node->getFlags();
11613
11614 EVT VT = VecOp.getValueType();
11615 EVT EltVT = VT.getVectorElementType();
11616
11617 if (VT.isScalableVector())
11619 "Expanding reductions for scalable vectors is undefined.");
11620
11621 unsigned NumElts = VT.getVectorNumElements();
11622
11624 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11625
11626 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11627
11628 SDValue Res = AccOp;
11629 for (unsigned i = 0; i < NumElts; i++)
11630 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11631
11632 return Res;
11633}
11634
11636 SelectionDAG &DAG) const {
11637 EVT VT = Node->getValueType(0);
11638 SDLoc dl(Node);
11639 bool isSigned = Node->getOpcode() == ISD::SREM;
11640 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11641 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11642 SDValue Dividend = Node->getOperand(0);
11643 SDValue Divisor = Node->getOperand(1);
11644 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11645 SDVTList VTs = DAG.getVTList(VT, VT);
11646 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11647 return true;
11648 }
11649 if (isOperationLegalOrCustom(DivOpc, VT)) {
11650 // X % Y -> X-X/Y*Y
11651 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11652 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11653 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11654 return true;
11655 }
11656 return false;
11657}
11658
11660 SelectionDAG &DAG) const {
11661 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11662 SDLoc dl(SDValue(Node, 0));
11663 SDValue Src = Node->getOperand(0);
11664
11665 // DstVT is the result type, while SatVT is the size to which we saturate
11666 EVT SrcVT = Src.getValueType();
11667 EVT DstVT = Node->getValueType(0);
11668
11669 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11670 unsigned SatWidth = SatVT.getScalarSizeInBits();
11671 unsigned DstWidth = DstVT.getScalarSizeInBits();
11672 assert(SatWidth <= DstWidth &&
11673 "Expected saturation width smaller than result width");
11674
11675 // Determine minimum and maximum integer values and their corresponding
11676 // floating-point values.
11677 APInt MinInt, MaxInt;
11678 if (IsSigned) {
11679 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11680 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11681 } else {
11682 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11683 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11684 }
11685
11686 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11687 // libcall emission cannot handle this. Large result types will fail.
11688 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11689 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11690 SrcVT = Src.getValueType();
11691 }
11692
11693 const fltSemantics &Sem = SrcVT.getFltSemantics();
11694 APFloat MinFloat(Sem);
11695 APFloat MaxFloat(Sem);
11696
11697 APFloat::opStatus MinStatus =
11698 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11699 APFloat::opStatus MaxStatus =
11700 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11701 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11702 !(MaxStatus & APFloat::opStatus::opInexact);
11703
11704 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11705 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11706
11707 // If the integer bounds are exactly representable as floats and min/max are
11708 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11709 // of comparisons and selects.
11710 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11711 isOperationLegal(ISD::FMAXNUM, SrcVT);
11712 if (AreExactFloatBounds && MinMaxLegal) {
11713 SDValue Clamped = Src;
11714
11715 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11716 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11717 // Clamp by MaxFloat from above. NaN cannot occur.
11718 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11719 // Convert clamped value to integer.
11720 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11721 dl, DstVT, Clamped);
11722
11723 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11724 // which will cast to zero.
11725 if (!IsSigned)
11726 return FpToInt;
11727
11728 // Otherwise, select 0 if Src is NaN.
11729 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11730 EVT SetCCVT =
11731 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11732 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11733 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11734 }
11735
11736 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11737 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11738
11739 // Result of direct conversion. The assumption here is that the operation is
11740 // non-trapping and it's fine to apply it to an out-of-range value if we
11741 // select it away later.
11742 SDValue FpToInt =
11743 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11744
11745 SDValue Select = FpToInt;
11746
11747 EVT SetCCVT =
11748 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11749
11750 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11751 // MinInt if Src is NaN.
11752 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11753 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11754 // If Src OGT MaxFloat, select MaxInt.
11755 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11756 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11757
11758 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11759 // is already zero.
11760 if (!IsSigned)
11761 return Select;
11762
11763 // Otherwise, select 0 if Src is NaN.
11764 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11765 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11766 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11767}
11768
11770 const SDLoc &dl,
11771 SelectionDAG &DAG) const {
11772 EVT OperandVT = Op.getValueType();
11773 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11774 return Op;
11775 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11776 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11777 // can induce double-rounding which may alter the results. We can
11778 // correct for this using a trick explained in: Boldo, Sylvie, and
11779 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11780 // World Congress. 2005.
11781 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
11782 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
11783
11784 // We can keep the narrow value as-is if narrowing was exact (no
11785 // rounding error), the wide value was NaN (the narrow value is also
11786 // NaN and should be preserved) or if we rounded to the odd value.
11787 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
11788 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11789 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11790 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11791 EVT ResultIntVTCCVT = getSetCCResultType(
11792 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11793 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11794 // The result is already odd so we don't need to do anything.
11795 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11796
11797 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11798 Op.getValueType());
11799 // We keep results which are exact, odd or NaN.
11800 SDValue KeepNarrow =
11801 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
11802 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11803 // We morally performed a round-down if AbsNarrow is smaller than
11804 // AbsWide.
11805 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11806 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
11807 SDValue NarrowIsRd =
11808 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11809 // If the narrow value is odd or exact, pick it.
11810 // Otherwise, narrow is even and corresponds to either the rounded-up
11811 // or rounded-down value. If narrow is the rounded-down value, we want
11812 // the rounded-up value as it will be odd.
11813 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11814 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11815 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11816 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11817}
11818
11820 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11821 SDValue Op = Node->getOperand(0);
11822 EVT VT = Node->getValueType(0);
11823 SDLoc dl(Node);
11824 if (VT.getScalarType() == MVT::bf16) {
11825 if (Node->getConstantOperandVal(1) == 1) {
11826 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11827 }
11828 EVT OperandVT = Op.getValueType();
11829 SDValue IsNaN = DAG.getSetCC(
11830 dl,
11831 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11832 Op, Op, ISD::SETUO);
11833
11834 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11835 // can induce double-rounding which may alter the results. We can
11836 // correct for this using a trick explained in: Boldo, Sylvie, and
11837 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11838 // World Congress. 2005.
11839 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11840 EVT I32 = F32.changeTypeToInteger();
11841 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11842 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11843
11844 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11845 // turning into infinities.
11846 SDValue NaN =
11847 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11848
11849 // Factor in the contribution of the low 16 bits.
11850 SDValue One = DAG.getConstant(1, dl, I32);
11851 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11852 DAG.getShiftAmountConstant(16, I32, dl));
11853 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11854 SDValue RoundingBias =
11855 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11856 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11857
11858 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11859 // 0x80000000.
11860 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11861
11862 // Now that we have rounded, shift the bits into position.
11863 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11864 DAG.getShiftAmountConstant(16, I32, dl));
11865 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11866 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11867 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11868 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11869 }
11870 return SDValue();
11871}
11872
11874 SelectionDAG &DAG) const {
11875 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11876 assert(Node->getValueType(0).isScalableVector() &&
11877 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11878
11879 EVT VT = Node->getValueType(0);
11880 SDValue V1 = Node->getOperand(0);
11881 SDValue V2 = Node->getOperand(1);
11882 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11883 SDLoc DL(Node);
11884
11885 // Expand through memory thusly:
11886 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11887 // Store V1, Ptr
11888 // Store V2, Ptr + sizeof(V1)
11889 // If (Imm < 0)
11890 // TrailingElts = -Imm
11891 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11892 // else
11893 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11894 // Res = Load Ptr
11895
11896 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11897
11899 VT.getVectorElementCount() * 2);
11900 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11901 EVT PtrVT = StackPtr.getValueType();
11902 auto &MF = DAG.getMachineFunction();
11903 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11904 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11905
11906 // Store the lo part of CONCAT_VECTORS(V1, V2)
11907 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11908 // Store the hi part of CONCAT_VECTORS(V1, V2)
11909 SDValue OffsetToV2 = DAG.getVScale(
11910 DL, PtrVT,
11912 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11913 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11914
11915 if (Imm >= 0) {
11916 // Load back the required element. getVectorElementPointer takes care of
11917 // clamping the index if it's out-of-bounds.
11918 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11919 // Load the spliced result
11920 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11922 }
11923
11924 uint64_t TrailingElts = -Imm;
11925
11926 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11927 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11928 SDValue TrailingBytes =
11929 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11930
11931 if (TrailingElts > VT.getVectorMinNumElements()) {
11932 SDValue VLBytes =
11933 DAG.getVScale(DL, PtrVT,
11934 APInt(PtrVT.getFixedSizeInBits(),
11936 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11937 }
11938
11939 // Calculate the start address of the spliced result.
11940 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11941
11942 // Load the spliced result
11943 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11945}
11946
11948 SelectionDAG &DAG) const {
11949 SDLoc DL(Node);
11950 SDValue Vec = Node->getOperand(0);
11951 SDValue Mask = Node->getOperand(1);
11952 SDValue Passthru = Node->getOperand(2);
11953
11954 EVT VecVT = Vec.getValueType();
11955 EVT ScalarVT = VecVT.getScalarType();
11956 EVT MaskVT = Mask.getValueType();
11957 EVT MaskScalarVT = MaskVT.getScalarType();
11958
11959 // Needs to be handled by targets that have scalable vector types.
11960 if (VecVT.isScalableVector())
11961 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11962
11963 SDValue StackPtr = DAG.CreateStackTemporary(
11964 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11965 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11966 MachinePointerInfo PtrInfo =
11968
11969 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11970 SDValue Chain = DAG.getEntryNode();
11971 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11972
11973 bool HasPassthru = !Passthru.isUndef();
11974
11975 // If we have a passthru vector, store it on the stack, overwrite the matching
11976 // positions and then re-write the last element that was potentially
11977 // overwritten even though mask[i] = false.
11978 if (HasPassthru)
11979 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11980
11981 SDValue LastWriteVal;
11982 APInt PassthruSplatVal;
11983 bool IsSplatPassthru =
11984 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11985
11986 if (IsSplatPassthru) {
11987 // As we do not know which position we wrote to last, we cannot simply
11988 // access that index from the passthru vector. So we first check if passthru
11989 // is a splat vector, to use any element ...
11990 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11991 } else if (HasPassthru) {
11992 // ... if it is not a splat vector, we need to get the passthru value at
11993 // position = popcount(mask) and re-load it from the stack before it is
11994 // overwritten in the loop below.
11995 EVT PopcountVT = ScalarVT.changeTypeToInteger();
11996 SDValue Popcount = DAG.getNode(
11997 ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11998 Popcount =
12000 MaskVT.changeVectorElementType(PopcountVT), Popcount);
12001 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
12002 SDValue LastElmtPtr =
12003 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
12004 LastWriteVal = DAG.getLoad(
12005 ScalarVT, DL, Chain, LastElmtPtr,
12007 Chain = LastWriteVal.getValue(1);
12008 }
12009
12010 unsigned NumElms = VecVT.getVectorNumElements();
12011 for (unsigned I = 0; I < NumElms; I++) {
12012 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
12013 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12014 Chain = DAG.getStore(
12015 Chain, DL, ValI, OutPtr,
12017
12018 // Get the mask value and add it to the current output position. This
12019 // either increments by 1 if MaskI is true or adds 0 otherwise.
12020 // Freeze in case we have poison/undef mask entries.
12021 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
12022 MaskI = DAG.getFreeze(MaskI);
12023 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
12024 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
12025 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
12026
12027 if (HasPassthru && I == NumElms - 1) {
12028 SDValue EndOfVector =
12029 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
12030 SDValue AllLanesSelected =
12031 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
12032 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
12033 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12034
12035 // Re-write the last ValI if all lanes were selected. Otherwise,
12036 // overwrite the last write it with the passthru value.
12037 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
12038 LastWriteVal, SDNodeFlags::Unpredictable);
12039 Chain = DAG.getStore(
12040 Chain, DL, LastWriteVal, OutPtr,
12042 }
12043 }
12044
12045 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12046}
12047
12049 SelectionDAG &DAG) const {
12050 SDLoc DL(N);
12051 SDValue Acc = N->getOperand(0);
12052 SDValue MulLHS = N->getOperand(1);
12053 SDValue MulRHS = N->getOperand(2);
12054 EVT AccVT = Acc.getValueType();
12055 EVT MulOpVT = MulLHS.getValueType();
12056
12057 EVT ExtMulOpVT =
12059 MulOpVT.getVectorElementCount());
12060
12061 unsigned ExtOpcLHS = N->getOpcode() == ISD::PARTIAL_REDUCE_UMLA
12064 unsigned ExtOpcRHS = N->getOpcode() == ISD::PARTIAL_REDUCE_SMLA
12067
12068 if (ExtMulOpVT != MulOpVT) {
12069 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
12070 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
12071 }
12072 SDValue Input = MulLHS;
12073 APInt ConstantOne;
12074 if (!ISD::isConstantSplatVector(MulRHS.getNode(), ConstantOne) ||
12075 !ConstantOne.isOne())
12076 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12077
12078 unsigned Stride = AccVT.getVectorMinNumElements();
12079 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12080
12081 // Collect all of the subvectors
12082 std::deque<SDValue> Subvectors = {Acc};
12083 for (unsigned I = 0; I < ScaleFactor; I++)
12084 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
12085
12086 // Flatten the subvector tree
12087 while (Subvectors.size() > 1) {
12088 Subvectors.push_back(
12089 DAG.getNode(ISD::ADD, DL, AccVT, {Subvectors[0], Subvectors[1]}));
12090 Subvectors.pop_front();
12091 Subvectors.pop_front();
12092 }
12093
12094 assert(Subvectors.size() == 1 &&
12095 "There should only be one subvector after tree flattening");
12096
12097 return Subvectors[0];
12098}
12099
12101 SDValue &LHS, SDValue &RHS,
12102 SDValue &CC, SDValue Mask,
12103 SDValue EVL, bool &NeedInvert,
12104 const SDLoc &dl, SDValue &Chain,
12105 bool IsSignaling) const {
12106 MVT OpVT = LHS.getSimpleValueType();
12107 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
12108 NeedInvert = false;
12109 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12110 bool IsNonVP = !EVL;
12111 switch (getCondCodeAction(CCCode, OpVT)) {
12112 default:
12113 llvm_unreachable("Unknown condition code action!");
12115 // Nothing to do.
12116 break;
12119 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12120 std::swap(LHS, RHS);
12121 CC = DAG.getCondCode(InvCC);
12122 return true;
12123 }
12124 // Swapping operands didn't work. Try inverting the condition.
12125 bool NeedSwap = false;
12126 InvCC = getSetCCInverse(CCCode, OpVT);
12127 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
12128 // If inverting the condition is not enough, try swapping operands
12129 // on top of it.
12130 InvCC = ISD::getSetCCSwappedOperands(InvCC);
12131 NeedSwap = true;
12132 }
12133 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12134 CC = DAG.getCondCode(InvCC);
12135 NeedInvert = true;
12136 if (NeedSwap)
12137 std::swap(LHS, RHS);
12138 return true;
12139 }
12140
12141 // Special case: expand i1 comparisons using logical operations.
12142 if (OpVT == MVT::i1) {
12143 SDValue Ret;
12144 switch (CCCode) {
12145 default:
12146 llvm_unreachable("Unknown integer setcc!");
12147 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12148 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
12149 MVT::i1);
12150 break;
12151 case ISD::SETNE: // X != Y --> (X ^ Y)
12152 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
12153 break;
12154 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12155 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12156 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
12157 DAG.getNOT(dl, LHS, MVT::i1));
12158 break;
12159 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12160 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12161 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
12162 DAG.getNOT(dl, RHS, MVT::i1));
12163 break;
12164 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12165 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12166 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
12167 DAG.getNOT(dl, LHS, MVT::i1));
12168 break;
12169 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12170 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12171 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
12172 DAG.getNOT(dl, RHS, MVT::i1));
12173 break;
12174 }
12175
12176 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
12177 RHS = SDValue();
12178 CC = SDValue();
12179 return true;
12180 }
12181
12183 unsigned Opc = 0;
12184 switch (CCCode) {
12185 default:
12186 llvm_unreachable("Don't know how to expand this condition!");
12187 case ISD::SETUO:
12188 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
12189 CC1 = ISD::SETUNE;
12190 CC2 = ISD::SETUNE;
12191 Opc = ISD::OR;
12192 break;
12193 }
12195 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12196 NeedInvert = true;
12197 [[fallthrough]];
12198 case ISD::SETO:
12200 "If SETO is expanded, SETOEQ must be legal!");
12201 CC1 = ISD::SETOEQ;
12202 CC2 = ISD::SETOEQ;
12203 Opc = ISD::AND;
12204 break;
12205 case ISD::SETONE:
12206 case ISD::SETUEQ:
12207 // If the SETUO or SETO CC isn't legal, we might be able to use
12208 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12209 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12210 // the operands.
12211 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12212 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
12213 isCondCodeLegal(ISD::SETOLT, OpVT))) {
12214 CC1 = ISD::SETOGT;
12215 CC2 = ISD::SETOLT;
12216 Opc = ISD::OR;
12217 NeedInvert = ((unsigned)CCCode & 0x8U);
12218 break;
12219 }
12220 [[fallthrough]];
12221 case ISD::SETOEQ:
12222 case ISD::SETOGT:
12223 case ISD::SETOGE:
12224 case ISD::SETOLT:
12225 case ISD::SETOLE:
12226 case ISD::SETUNE:
12227 case ISD::SETUGT:
12228 case ISD::SETUGE:
12229 case ISD::SETULT:
12230 case ISD::SETULE:
12231 // If we are floating point, assign and break, otherwise fall through.
12232 if (!OpVT.isInteger()) {
12233 // We can use the 4th bit to tell if we are the unordered
12234 // or ordered version of the opcode.
12235 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12236 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12237 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12238 break;
12239 }
12240 // Fallthrough if we are unsigned integer.
12241 [[fallthrough]];
12242 case ISD::SETLE:
12243 case ISD::SETGT:
12244 case ISD::SETGE:
12245 case ISD::SETLT:
12246 case ISD::SETNE:
12247 case ISD::SETEQ:
12248 // If all combinations of inverting the condition and swapping operands
12249 // didn't work then we have no means to expand the condition.
12250 llvm_unreachable("Don't know how to expand this condition!");
12251 }
12252
12253 SDValue SetCC1, SetCC2;
12254 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12255 // If we aren't the ordered or unorder operation,
12256 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12257 if (IsNonVP) {
12258 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
12259 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
12260 } else {
12261 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
12262 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
12263 }
12264 } else {
12265 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12266 if (IsNonVP) {
12267 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
12268 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
12269 } else {
12270 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
12271 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
12272 }
12273 }
12274 if (Chain)
12275 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12276 SetCC2.getValue(1));
12277 if (IsNonVP)
12278 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12279 else {
12280 // Transform the binary opcode to the VP equivalent.
12281 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12282 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12283 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12284 }
12285 RHS = SDValue();
12286 CC = SDValue();
12287 return true;
12288 }
12289 }
12290 return false;
12291}
12292
12294 SelectionDAG &DAG) const {
12295 EVT VT = Node->getValueType(0);
12296 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12297 // split into two equal parts.
12298 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12299 return SDValue();
12300
12301 // Restrict expansion to cases where both parts can be concatenated.
12302 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12303 if (LoVT != HiVT || !isTypeLegal(LoVT))
12304 return SDValue();
12305
12306 SDLoc DL(Node);
12307 unsigned Opcode = Node->getOpcode();
12308
12309 // Don't expand if the result is likely to be unrolled anyway.
12310 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12311 return SDValue();
12312
12313 SmallVector<SDValue, 4> LoOps, HiOps;
12314 for (const SDValue &V : Node->op_values()) {
12315 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
12316 LoOps.push_back(Lo);
12317 HiOps.push_back(Hi);
12318 }
12319
12320 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
12321 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
12322 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
12323}
12324
12326 const SDLoc &DL,
12327 EVT InVecVT, SDValue EltNo,
12328 LoadSDNode *OriginalLoad,
12329 SelectionDAG &DAG) const {
12330 assert(OriginalLoad->isSimple());
12331
12332 EVT VecEltVT = InVecVT.getVectorElementType();
12333
12334 // If the vector element type is not a multiple of a byte then we are unable
12335 // to correctly compute an address to load only the extracted element as a
12336 // scalar.
12337 if (!VecEltVT.isByteSized())
12338 return SDValue();
12339
12340 ISD::LoadExtType ExtTy =
12341 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12342 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
12343 return SDValue();
12344
12345 std::optional<unsigned> ByteOffset;
12346 Align Alignment = OriginalLoad->getAlign();
12348 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
12349 int Elt = ConstEltNo->getZExtValue();
12350 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
12351 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
12352 Alignment = commonAlignment(Alignment, *ByteOffset);
12353 } else {
12354 // Discard the pointer info except the address space because the memory
12355 // operand can't represent this new access since the offset is variable.
12356 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
12357 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
12358 }
12359
12360 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
12361 return SDValue();
12362
12363 unsigned IsFast = 0;
12364 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
12365 OriginalLoad->getAddressSpace(), Alignment,
12366 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
12367 !IsFast)
12368 return SDValue();
12369
12370 SDValue NewPtr =
12371 getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
12372
12373 // We are replacing a vector load with a scalar load. The new load must have
12374 // identical memory op ordering to the original.
12375 SDValue Load;
12376 if (ResultVT.bitsGT(VecEltVT)) {
12377 // If the result type of vextract is wider than the load, then issue an
12378 // extending load instead.
12379 ISD::LoadExtType ExtType = isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT)
12381 : ISD::EXTLOAD;
12382 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
12383 NewPtr, MPI, VecEltVT, Alignment,
12384 OriginalLoad->getMemOperand()->getFlags(),
12385 OriginalLoad->getAAInfo());
12386 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12387 } else {
12388 // The result type is narrower or the same width as the vector element
12389 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
12390 Alignment, OriginalLoad->getMemOperand()->getFlags(),
12391 OriginalLoad->getAAInfo());
12392 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12393 if (ResultVT.bitsLT(VecEltVT))
12394 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
12395 else
12396 Load = DAG.getBitcast(ResultVT, Load);
12397 }
12398
12399 return Load;
12400}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool isSigned(unsigned int Opcode)
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
#define P(N)
Function const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1347
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1158
APInt bitcastToAPInt() const
Definition APFloat.h:1353
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1138
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1098
void changeSign()
Definition APFloat.h:1297
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1109
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1573
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1758
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1406
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:423
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1391
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1340
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:216
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1396
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:834
void negate()
Negate this APInt in place.
Definition APInt.h:1468
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
unsigned countLeadingZeros() const
Definition APInt.h:1606
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:356
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1435
unsigned logBase2() const
Definition APInt.h:1761
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:827
void setAllBits()
Set every bit to 1.
Definition APInt.h:1319
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1274
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:405
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:334
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1150
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1367
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1417
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1442
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1343
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:198
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:445
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
iterator end() const
Definition ArrayRef.h:348
iterator begin() const
Definition ArrayRef.h:347
Class to represent pointers.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:581
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:148
iterator end() const
Definition StringRef.h:122
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
TargetLoweringBase(const TargetMachine &TM)
NOTE: The TargetMachine owns TLOF.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:774
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:296
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:107
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:705
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3009
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:774
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:525
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:387
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:515
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:393
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:892
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:400
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:706
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:773
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:351
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:881
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:406
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:701
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:648
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:903
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
void stable_sort(R &&Range)
Definition STLExtras.h:2038
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:557
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:314
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1743
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
Definition ModRef.h:68
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1569
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:430
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:470
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:301
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:186
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:255
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
bool isZero() const
Returns true if value is all zero.
Definition KnownBits.h:80
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:242
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
void setAllConflict()
Make all bits known to be both zero and one.
Definition KnownBits.h:99
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:161
KnownBits byteSwap() const
Definition KnownBits.h:514
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:289
KnownBits reverseBits() const
Definition KnownBits.h:518
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:233
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:321
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:69
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:248
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:60
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:167
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:286
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...