LLVM 22.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
46
47// Define the virtual destructor out-of-line for build efficiency.
49
50const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
51 return nullptr;
52}
53
57
58/// Check whether a given call node is in tail position within its function. If
59/// so, it sets Chain to the input chain of the tail call.
61 SDValue &Chain) const {
63
64 // First, check if tail calls have been disabled in this function.
65 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
66 return false;
67
68 // Conservatively require the attributes of the call to match those of
69 // the return. Ignore following attributes because they don't affect the
70 // call sequence.
71 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
72 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
73 Attribute::DereferenceableOrNull, Attribute::NoAlias,
74 Attribute::NonNull, Attribute::NoUndef,
75 Attribute::Range, Attribute::NoFPClass})
76 CallerAttrs.removeAttribute(Attr);
77
78 if (CallerAttrs.hasAttributes())
79 return false;
80
81 // It's not safe to eliminate the sign / zero extension of the return value.
82 if (CallerAttrs.contains(Attribute::ZExt) ||
83 CallerAttrs.contains(Attribute::SExt))
84 return false;
85
86 // Check if the only use is a function return node.
87 return isUsedByReturnOnly(Node, Chain);
88}
89
91 const uint32_t *CallerPreservedMask,
92 const SmallVectorImpl<CCValAssign> &ArgLocs,
93 const SmallVectorImpl<SDValue> &OutVals) const {
94 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
95 const CCValAssign &ArgLoc = ArgLocs[I];
96 if (!ArgLoc.isRegLoc())
97 continue;
98 MCRegister Reg = ArgLoc.getLocReg();
99 // Only look at callee saved registers.
100 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
101 continue;
102 // Check that we pass the value used for the caller.
103 // (We look for a CopyFromReg reading a virtual register that is used
104 // for the function live-in value of register Reg)
105 SDValue Value = OutVals[I];
106 if (Value->getOpcode() == ISD::AssertZext)
107 Value = Value.getOperand(0);
108 if (Value->getOpcode() != ISD::CopyFromReg)
109 return false;
110 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
111 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
112 return false;
113 }
114 return true;
115}
116
117/// Set CallLoweringInfo attribute flags based on a call instruction
118/// and called function attributes.
120 unsigned ArgIdx) {
121 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
122 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
123 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
124 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
125 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
126 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
127 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
128 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
129 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
130 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
131 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
132 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
133 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
134 Alignment = Call->getParamStackAlign(ArgIdx);
135 IndirectType = nullptr;
137 "multiple ABI attributes?");
138 if (IsByVal) {
139 IndirectType = Call->getParamByValType(ArgIdx);
140 if (!Alignment)
141 Alignment = Call->getParamAlign(ArgIdx);
142 }
143 if (IsPreallocated)
144 IndirectType = Call->getParamPreallocatedType(ArgIdx);
145 if (IsInAlloca)
146 IndirectType = Call->getParamInAllocaType(ArgIdx);
147 if (IsSRet)
148 IndirectType = Call->getParamStructRetType(ArgIdx);
149}
150
151/// Generate a libcall taking the given operands as arguments and returning a
152/// result of type RetVT.
153std::pair<SDValue, SDValue>
154TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
156 MakeLibCallOptions CallOptions,
157 const SDLoc &dl,
158 SDValue InChain) const {
159 if (!InChain)
160 InChain = DAG.getEntryNode();
161
163 Args.reserve(Ops.size());
164
165 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
166 for (unsigned i = 0; i < Ops.size(); ++i) {
167 SDValue NewOp = Ops[i];
168 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
169 ? OpsTypeOverrides[i]
170 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
171 TargetLowering::ArgListEntry Entry(NewOp, Ty);
172 if (CallOptions.IsSoften)
173 Entry.OrigTy =
174 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
175
176 Entry.IsSExt =
177 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
178 Entry.IsZExt = !Entry.IsSExt;
179
180 if (CallOptions.IsSoften &&
182 Entry.IsSExt = Entry.IsZExt = false;
183 }
184 Args.push_back(Entry);
185 }
186
187 const char *LibcallName = getLibcallName(LC);
188 if (LC == RTLIB::UNKNOWN_LIBCALL || !LibcallName)
189 reportFatalInternalError("unsupported library call operation");
190
191 SDValue Callee =
192 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
193
194 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
195 Type *OrigRetTy = RetTy;
197 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
198 bool zeroExtend = !signExtend;
199
200 if (CallOptions.IsSoften) {
201 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
203 signExtend = zeroExtend = false;
204 }
205
206 CLI.setDebugLoc(dl)
207 .setChain(InChain)
208 .setLibCallee(getLibcallCallingConv(LC), RetTy, OrigRetTy, Callee,
209 std::move(Args))
210 .setNoReturn(CallOptions.DoesNotReturn)
213 .setSExtResult(signExtend)
214 .setZExtResult(zeroExtend);
215 return LowerCallTo(CLI);
216}
217
219 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
220 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
221 const AttributeList &FuncAttributes) const {
222 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
223 Op.getSrcAlign() < Op.getDstAlign())
224 return false;
225
226 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
227
228 if (VT == MVT::Other) {
229 // Use the largest integer type whose alignment constraints are satisfied.
230 // We only need to check DstAlign here as SrcAlign is always greater or
231 // equal to DstAlign (or zero).
232 VT = MVT::LAST_INTEGER_VALUETYPE;
233 if (Op.isFixedDstAlign())
234 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
235 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
237 assert(VT.isInteger());
238
239 // Find the largest legal integer type.
240 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
241 while (!isTypeLegal(LVT))
242 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
243 assert(LVT.isInteger());
244
245 // If the type we've chosen is larger than the largest legal integer type
246 // then use that instead.
247 if (VT.bitsGT(LVT))
248 VT = LVT;
249 }
250
251 unsigned NumMemOps = 0;
252 uint64_t Size = Op.size();
253 while (Size) {
254 unsigned VTSize = VT.getSizeInBits() / 8;
255 while (VTSize > Size) {
256 // For now, only use non-vector load / store's for the left-over pieces.
257 EVT NewVT = VT;
258 unsigned NewVTSize;
259
260 bool Found = false;
261 if (VT.isVector() || VT.isFloatingPoint()) {
262 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
263 if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
265 Found = true;
266 else if (NewVT == MVT::i64 &&
267 isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
268 isSafeMemOpType(MVT::f64)) {
269 // i64 is usually not legal on 32-bit targets, but f64 may be.
270 NewVT = MVT::f64;
271 Found = true;
272 }
273 }
274
275 if (!Found) {
276 do {
277 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
278 if (NewVT == MVT::i8)
279 break;
280 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
281 }
282 NewVTSize = NewVT.getSizeInBits() / 8;
283
284 // If the new VT cannot cover all of the remaining bits, then consider
285 // issuing a (or a pair of) unaligned and overlapping load / store.
286 unsigned Fast;
287 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
289 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
291 Fast)
292 VTSize = Size;
293 else {
294 VT = NewVT;
295 VTSize = NewVTSize;
296 }
297 }
298
299 if (++NumMemOps > Limit)
300 return false;
301
302 MemOps.push_back(VT);
303 Size -= VTSize;
304 }
305
306 return true;
307}
308
309/// Soften the operands of a comparison. This code is shared among BR_CC,
310/// SELECT_CC, and SETCC handlers.
312 SDValue &NewLHS, SDValue &NewRHS,
313 ISD::CondCode &CCCode,
314 const SDLoc &dl, const SDValue OldLHS,
315 const SDValue OldRHS) const {
316 SDValue Chain;
317 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
318 OldRHS, Chain);
319}
320
322 SDValue &NewLHS, SDValue &NewRHS,
323 ISD::CondCode &CCCode,
324 const SDLoc &dl, const SDValue OldLHS,
325 const SDValue OldRHS,
326 SDValue &Chain,
327 bool IsSignaling) const {
328 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
329 // not supporting it. We can update this code when libgcc provides such
330 // functions.
331
332 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
333 && "Unsupported setcc type!");
334
335 // Expand into one or more soft-fp libcall(s).
336 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
337 bool ShouldInvertCC = false;
338 switch (CCCode) {
339 case ISD::SETEQ:
340 case ISD::SETOEQ:
341 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
342 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
343 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
344 break;
345 case ISD::SETNE:
346 case ISD::SETUNE:
347 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
348 (VT == MVT::f64) ? RTLIB::UNE_F64 :
349 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
350 break;
351 case ISD::SETGE:
352 case ISD::SETOGE:
353 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
354 (VT == MVT::f64) ? RTLIB::OGE_F64 :
355 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
356 break;
357 case ISD::SETLT:
358 case ISD::SETOLT:
359 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
360 (VT == MVT::f64) ? RTLIB::OLT_F64 :
361 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
362 break;
363 case ISD::SETLE:
364 case ISD::SETOLE:
365 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
366 (VT == MVT::f64) ? RTLIB::OLE_F64 :
367 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
368 break;
369 case ISD::SETGT:
370 case ISD::SETOGT:
371 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
372 (VT == MVT::f64) ? RTLIB::OGT_F64 :
373 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
374 break;
375 case ISD::SETO:
376 ShouldInvertCC = true;
377 [[fallthrough]];
378 case ISD::SETUO:
379 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
380 (VT == MVT::f64) ? RTLIB::UO_F64 :
381 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
382 break;
383 case ISD::SETONE:
384 // SETONE = O && UNE
385 ShouldInvertCC = true;
386 [[fallthrough]];
387 case ISD::SETUEQ:
388 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
389 (VT == MVT::f64) ? RTLIB::UO_F64 :
390 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
391 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
392 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
393 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
394 break;
395 default:
396 // Invert CC for unordered comparisons
397 ShouldInvertCC = true;
398 switch (CCCode) {
399 case ISD::SETULT:
400 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
401 (VT == MVT::f64) ? RTLIB::OGE_F64 :
402 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
403 break;
404 case ISD::SETULE:
405 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
406 (VT == MVT::f64) ? RTLIB::OGT_F64 :
407 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
408 break;
409 case ISD::SETUGT:
410 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
411 (VT == MVT::f64) ? RTLIB::OLE_F64 :
412 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
413 break;
414 case ISD::SETUGE:
415 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
416 (VT == MVT::f64) ? RTLIB::OLT_F64 :
417 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
418 break;
419 default: llvm_unreachable("Do not know how to soften this setcc!");
420 }
421 }
422
423 // Use the target specific return value for comparison lib calls.
425 SDValue Ops[2] = {NewLHS, NewRHS};
427 EVT OpsVT[2] = { OldLHS.getValueType(),
428 OldRHS.getValueType() };
429 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
430 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
431 NewLHS = Call.first;
432 NewRHS = DAG.getConstant(0, dl, RetVT);
433
434 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
435 if (LC1Impl == RTLIB::Unsupported) {
437 "no libcall available to soften floating-point compare");
438 }
439
440 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
441 if (ShouldInvertCC) {
442 assert(RetVT.isInteger());
443 CCCode = getSetCCInverse(CCCode, RetVT);
444 }
445
446 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
447 // Update Chain.
448 Chain = Call.second;
449 } else {
450 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
451 if (LC2Impl == RTLIB::Unsupported) {
453 "no libcall available to soften floating-point compare");
454 }
455
456 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
457 "unordered call should be simple boolean");
458
459 EVT SetCCVT =
460 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
462 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
463 DAG.getValueType(MVT::i1));
464 }
465
466 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
467 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
468 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
469 if (ShouldInvertCC)
470 CCCode = getSetCCInverse(CCCode, RetVT);
471 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
472 if (Chain)
473 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
474 Call2.second);
475 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
476 Tmp.getValueType(), Tmp, NewLHS);
477 NewRHS = SDValue();
478 }
479}
480
481/// Return the entry encoding for a jump table in the current function. The
482/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
484 // In non-pic modes, just use the address of a block.
487
488 // Otherwise, use a label difference.
490}
491
493 SelectionDAG &DAG) const {
494 return Table;
495}
496
497/// This returns the relocation base for the given PIC jumptable, the same as
498/// getPICJumpTableRelocBase, but as an MCExpr.
499const MCExpr *
501 unsigned JTI,MCContext &Ctx) const{
502 // The normal PIC reloc base is the label at the start of the jump table.
503 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
504}
505
507 SDValue Addr, int JTI,
508 SelectionDAG &DAG) const {
509 SDValue Chain = Value;
510 // Jump table debug info is only needed if CodeView is enabled.
512 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
513 }
514 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
515}
516
517bool
519 const TargetMachine &TM = getTargetMachine();
520 const GlobalValue *GV = GA->getGlobal();
521
522 // If the address is not even local to this DSO we will have to load it from
523 // a got and then add the offset.
524 if (!TM.shouldAssumeDSOLocal(GV))
525 return false;
526
527 // If the code is position independent we will have to add a base register.
529 return false;
530
531 // Otherwise we can do it.
532 return true;
533}
534
535//===----------------------------------------------------------------------===//
536// Optimization Methods
537//===----------------------------------------------------------------------===//
538
539/// If the specified instruction has a constant integer operand and there are
540/// bits set in that constant that are not demanded, then clear those bits and
541/// return true.
543 const APInt &DemandedBits,
544 const APInt &DemandedElts,
545 TargetLoweringOpt &TLO) const {
546 SDLoc DL(Op);
547 unsigned Opcode = Op.getOpcode();
548
549 // Early-out if we've ended up calling an undemanded node, leave this to
550 // constant folding.
551 if (DemandedBits.isZero() || DemandedElts.isZero())
552 return false;
553
554 // Do target-specific constant optimization.
555 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
556 return TLO.New.getNode();
557
558 // FIXME: ISD::SELECT, ISD::SELECT_CC
559 switch (Opcode) {
560 default:
561 break;
562 case ISD::XOR:
563 case ISD::AND:
564 case ISD::OR: {
565 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
566 if (!Op1C || Op1C->isOpaque())
567 return false;
568
569 // If this is a 'not' op, don't touch it because that's a canonical form.
570 const APInt &C = Op1C->getAPIntValue();
571 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
572 return false;
573
574 if (!C.isSubsetOf(DemandedBits)) {
575 EVT VT = Op.getValueType();
576 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
577 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
578 Op->getFlags());
579 return TLO.CombineTo(Op, NewOp);
580 }
581
582 break;
583 }
584 }
585
586 return false;
587}
588
590 const APInt &DemandedBits,
591 TargetLoweringOpt &TLO) const {
592 EVT VT = Op.getValueType();
593 APInt DemandedElts = VT.isVector()
595 : APInt(1, 1);
596 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
597}
598
599/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
600/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
601/// but it could be generalized for targets with other types of implicit
602/// widening casts.
604 const APInt &DemandedBits,
605 TargetLoweringOpt &TLO) const {
606 assert(Op.getNumOperands() == 2 &&
607 "ShrinkDemandedOp only supports binary operators!");
608 assert(Op.getNode()->getNumValues() == 1 &&
609 "ShrinkDemandedOp only supports nodes with one result!");
610
611 EVT VT = Op.getValueType();
612 SelectionDAG &DAG = TLO.DAG;
613 SDLoc dl(Op);
614
615 // Early return, as this function cannot handle vector types.
616 if (VT.isVector())
617 return false;
618
619 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
620 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
621 "ShrinkDemandedOp only supports operands that have the same size!");
622
623 // Don't do this if the node has another user, which may require the
624 // full value.
625 if (!Op.getNode()->hasOneUse())
626 return false;
627
628 // Search for the smallest integer type with free casts to and from
629 // Op's type. For expedience, just check power-of-2 integer types.
630 unsigned DemandedSize = DemandedBits.getActiveBits();
631 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
632 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
633 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
634 if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
635 // We found a type with free casts.
636
637 // If the operation has the 'disjoint' flag, then the
638 // operands on the new node are also disjoint.
639 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
641 unsigned Opcode = Op.getOpcode();
642 if (Opcode == ISD::PTRADD) {
643 // It isn't a ptradd anymore if it doesn't operate on the entire
644 // pointer.
645 Opcode = ISD::ADD;
646 }
647 SDValue X = DAG.getNode(
648 Opcode, dl, SmallVT,
649 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
650 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
651 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
652 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
653 return TLO.CombineTo(Op, Z);
654 }
655 }
656 return false;
657}
658
660 DAGCombinerInfo &DCI) const {
661 SelectionDAG &DAG = DCI.DAG;
662 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
663 !DCI.isBeforeLegalizeOps());
664 KnownBits Known;
665
666 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
667 if (Simplified) {
668 DCI.AddToWorklist(Op.getNode());
670 }
671 return Simplified;
672}
673
675 const APInt &DemandedElts,
676 DAGCombinerInfo &DCI) const {
677 SelectionDAG &DAG = DCI.DAG;
678 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
679 !DCI.isBeforeLegalizeOps());
680 KnownBits Known;
681
682 bool Simplified =
683 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
684 if (Simplified) {
685 DCI.AddToWorklist(Op.getNode());
687 }
688 return Simplified;
689}
690
692 KnownBits &Known,
694 unsigned Depth,
695 bool AssumeSingleUse) const {
696 EVT VT = Op.getValueType();
697
698 // Since the number of lanes in a scalable vector is unknown at compile time,
699 // we track one bit which is implicitly broadcast to all lanes. This means
700 // that all lanes in a scalable vector are considered demanded.
701 APInt DemandedElts = VT.isFixedLengthVector()
703 : APInt(1, 1);
704 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
705 AssumeSingleUse);
706}
707
708// TODO: Under what circumstances can we create nodes? Constant folding?
710 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
711 SelectionDAG &DAG, unsigned Depth) const {
712 EVT VT = Op.getValueType();
713
714 // Limit search depth.
716 return SDValue();
717
718 // Ignore UNDEFs.
719 if (Op.isUndef())
720 return SDValue();
721
722 // Not demanding any bits/elts from Op.
723 if (DemandedBits == 0 || DemandedElts == 0)
724 return DAG.getUNDEF(VT);
725
726 bool IsLE = DAG.getDataLayout().isLittleEndian();
727 unsigned NumElts = DemandedElts.getBitWidth();
728 unsigned BitWidth = DemandedBits.getBitWidth();
729 KnownBits LHSKnown, RHSKnown;
730 switch (Op.getOpcode()) {
731 case ISD::BITCAST: {
732 if (VT.isScalableVector())
733 return SDValue();
734
735 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
736 EVT SrcVT = Src.getValueType();
737 EVT DstVT = Op.getValueType();
738 if (SrcVT == DstVT)
739 return Src;
740
741 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
742 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
743 if (NumSrcEltBits == NumDstEltBits)
745 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
746 return DAG.getBitcast(DstVT, V);
747
748 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
749 unsigned Scale = NumDstEltBits / NumSrcEltBits;
750 unsigned NumSrcElts = SrcVT.getVectorNumElements();
751 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
752 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
753 for (unsigned i = 0; i != Scale; ++i) {
754 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
755 unsigned BitOffset = EltOffset * NumSrcEltBits;
756 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
757 if (!Sub.isZero()) {
758 DemandedSrcBits |= Sub;
759 for (unsigned j = 0; j != NumElts; ++j)
760 if (DemandedElts[j])
761 DemandedSrcElts.setBit((j * Scale) + i);
762 }
763 }
764
766 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
767 return DAG.getBitcast(DstVT, V);
768 }
769
770 // TODO - bigendian once we have test coverage.
771 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
772 unsigned Scale = NumSrcEltBits / NumDstEltBits;
773 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
774 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
775 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
776 for (unsigned i = 0; i != NumElts; ++i)
777 if (DemandedElts[i]) {
778 unsigned Offset = (i % Scale) * NumDstEltBits;
779 DemandedSrcBits.insertBits(DemandedBits, Offset);
780 DemandedSrcElts.setBit(i / Scale);
781 }
782
784 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
785 return DAG.getBitcast(DstVT, V);
786 }
787
788 break;
789 }
790 case ISD::AND: {
791 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
792 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
793
794 // If all of the demanded bits are known 1 on one side, return the other.
795 // These bits cannot contribute to the result of the 'and' in this
796 // context.
797 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
798 return Op.getOperand(0);
799 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
800 return Op.getOperand(1);
801 break;
802 }
803 case ISD::OR: {
804 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
805 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
806
807 // If all of the demanded bits are known zero on one side, return the
808 // other. These bits cannot contribute to the result of the 'or' in this
809 // context.
810 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
811 return Op.getOperand(0);
812 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
813 return Op.getOperand(1);
814 break;
815 }
816 case ISD::XOR: {
817 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
818 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
819
820 // If all of the demanded bits are known zero on one side, return the
821 // other.
822 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
823 return Op.getOperand(0);
824 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
825 return Op.getOperand(1);
826 break;
827 }
828 case ISD::ADD: {
829 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
830 if (RHSKnown.isZero())
831 return Op.getOperand(0);
832
833 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
834 if (LHSKnown.isZero())
835 return Op.getOperand(1);
836 break;
837 }
838 case ISD::SHL: {
839 // If we are only demanding sign bits then we can use the shift source
840 // directly.
841 if (std::optional<unsigned> MaxSA =
842 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
843 SDValue Op0 = Op.getOperand(0);
844 unsigned ShAmt = *MaxSA;
845 unsigned NumSignBits =
846 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
847 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
848 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
849 return Op0;
850 }
851 break;
852 }
853 case ISD::SRL: {
854 // If we are only demanding sign bits then we can use the shift source
855 // directly.
856 if (std::optional<unsigned> MaxSA =
857 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
858 SDValue Op0 = Op.getOperand(0);
859 unsigned ShAmt = *MaxSA;
860 // Must already be signbits in DemandedBits bounds, and can't demand any
861 // shifted in zeroes.
862 if (DemandedBits.countl_zero() >= ShAmt) {
863 unsigned NumSignBits =
864 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
865 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
866 return Op0;
867 }
868 }
869 break;
870 }
871 case ISD::SETCC: {
872 SDValue Op0 = Op.getOperand(0);
873 SDValue Op1 = Op.getOperand(1);
874 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
875 // If (1) we only need the sign-bit, (2) the setcc operands are the same
876 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
877 // -1, we may be able to bypass the setcc.
878 if (DemandedBits.isSignMask() &&
882 // If we're testing X < 0, then this compare isn't needed - just use X!
883 // FIXME: We're limiting to integer types here, but this should also work
884 // if we don't care about FP signed-zero. The use of SETLT with FP means
885 // that we don't care about NaNs.
886 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
888 return Op0;
889 }
890 break;
891 }
893 // If none of the extended bits are demanded, eliminate the sextinreg.
894 SDValue Op0 = Op.getOperand(0);
895 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
896 unsigned ExBits = ExVT.getScalarSizeInBits();
897 if (DemandedBits.getActiveBits() <= ExBits &&
899 return Op0;
900 // If the input is already sign extended, just drop the extension.
901 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
902 if (NumSignBits >= (BitWidth - ExBits + 1))
903 return Op0;
904 break;
905 }
909 if (VT.isScalableVector())
910 return SDValue();
911
912 // If we only want the lowest element and none of extended bits, then we can
913 // return the bitcasted source vector.
914 SDValue Src = Op.getOperand(0);
915 EVT SrcVT = Src.getValueType();
916 EVT DstVT = Op.getValueType();
917 if (IsLE && DemandedElts == 1 &&
918 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
919 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
920 return DAG.getBitcast(DstVT, Src);
921 }
922 break;
923 }
925 if (VT.isScalableVector())
926 return SDValue();
927
928 // If we don't demand the inserted element, return the base vector.
929 SDValue Vec = Op.getOperand(0);
930 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
931 EVT VecVT = Vec.getValueType();
932 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
933 !DemandedElts[CIdx->getZExtValue()])
934 return Vec;
935 break;
936 }
938 if (VT.isScalableVector())
939 return SDValue();
940
941 SDValue Vec = Op.getOperand(0);
942 SDValue Sub = Op.getOperand(1);
943 uint64_t Idx = Op.getConstantOperandVal(2);
944 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
945 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
946 // If we don't demand the inserted subvector, return the base vector.
947 if (DemandedSubElts == 0)
948 return Vec;
949 break;
950 }
951 case ISD::VECTOR_SHUFFLE: {
953 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
954
955 // If all the demanded elts are from one operand and are inline,
956 // then we can use the operand directly.
957 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
958 for (unsigned i = 0; i != NumElts; ++i) {
959 int M = ShuffleMask[i];
960 if (M < 0 || !DemandedElts[i])
961 continue;
962 AllUndef = false;
963 IdentityLHS &= (M == (int)i);
964 IdentityRHS &= ((M - NumElts) == i);
965 }
966
967 if (AllUndef)
968 return DAG.getUNDEF(Op.getValueType());
969 if (IdentityLHS)
970 return Op.getOperand(0);
971 if (IdentityRHS)
972 return Op.getOperand(1);
973 break;
974 }
975 default:
976 // TODO: Probably okay to remove after audit; here to reduce change size
977 // in initial enablement patch for scalable vectors
978 if (VT.isScalableVector())
979 return SDValue();
980
981 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
983 Op, DemandedBits, DemandedElts, DAG, Depth))
984 return V;
985 break;
986 }
987 return SDValue();
988}
989
992 unsigned Depth) const {
993 EVT VT = Op.getValueType();
994 // Since the number of lanes in a scalable vector is unknown at compile time,
995 // we track one bit which is implicitly broadcast to all lanes. This means
996 // that all lanes in a scalable vector are considered demanded.
997 APInt DemandedElts = VT.isFixedLengthVector()
999 : APInt(1, 1);
1000 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1001 Depth);
1002}
1003
1005 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1006 unsigned Depth) const {
1007 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1008 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1009 Depth);
1010}
1011
1012// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1013// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1016 const TargetLowering &TLI,
1017 const APInt &DemandedBits,
1018 const APInt &DemandedElts, unsigned Depth) {
1019 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1020 "SRL or SRA node is required here!");
1021 // Is the right shift using an immediate value of 1?
1022 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1023 if (!N1C || !N1C->isOne())
1024 return SDValue();
1025
1026 // We are looking for an avgfloor
1027 // add(ext, ext)
1028 // or one of these as a avgceil
1029 // add(add(ext, ext), 1)
1030 // add(add(ext, 1), ext)
1031 // add(ext, add(ext, 1))
1032 SDValue Add = Op.getOperand(0);
1033 if (Add.getOpcode() != ISD::ADD)
1034 return SDValue();
1035
1036 SDValue ExtOpA = Add.getOperand(0);
1037 SDValue ExtOpB = Add.getOperand(1);
1038 SDValue Add2;
1039 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1040 ConstantSDNode *ConstOp;
1041 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1042 ConstOp->isOne()) {
1043 ExtOpA = Op1;
1044 ExtOpB = Op3;
1045 Add2 = A;
1046 return true;
1047 }
1048 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1049 ConstOp->isOne()) {
1050 ExtOpA = Op1;
1051 ExtOpB = Op2;
1052 Add2 = A;
1053 return true;
1054 }
1055 return false;
1056 };
1057 bool IsCeil =
1058 (ExtOpA.getOpcode() == ISD::ADD &&
1059 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1060 (ExtOpB.getOpcode() == ISD::ADD &&
1061 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1062
1063 // If the shift is signed (sra):
1064 // - Needs >= 2 sign bit for both operands.
1065 // - Needs >= 2 zero bits.
1066 // If the shift is unsigned (srl):
1067 // - Needs >= 1 zero bit for both operands.
1068 // - Needs 1 demanded bit zero and >= 2 sign bits.
1069 SelectionDAG &DAG = TLO.DAG;
1070 unsigned ShiftOpc = Op.getOpcode();
1071 bool IsSigned = false;
1072 unsigned KnownBits;
1073 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1074 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1075 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1076 unsigned NumZeroA =
1077 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1078 unsigned NumZeroB =
1079 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1080 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1081
1082 switch (ShiftOpc) {
1083 default:
1084 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1085 case ISD::SRA: {
1086 if (NumZero >= 2 && NumSigned < NumZero) {
1087 IsSigned = false;
1088 KnownBits = NumZero;
1089 break;
1090 }
1091 if (NumSigned >= 1) {
1092 IsSigned = true;
1093 KnownBits = NumSigned;
1094 break;
1095 }
1096 return SDValue();
1097 }
1098 case ISD::SRL: {
1099 if (NumZero >= 1 && NumSigned < NumZero) {
1100 IsSigned = false;
1101 KnownBits = NumZero;
1102 break;
1103 }
1104 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1105 IsSigned = true;
1106 KnownBits = NumSigned;
1107 break;
1108 }
1109 return SDValue();
1110 }
1111 }
1112
1113 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1114 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1115
1116 // Find the smallest power-2 type that is legal for this vector size and
1117 // operation, given the original type size and the number of known sign/zero
1118 // bits.
1119 EVT VT = Op.getValueType();
1120 unsigned MinWidth =
1121 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1122 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1124 return SDValue();
1125 if (VT.isVector())
1126 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1127 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1128 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1129 // larger type size to do the transform.
1130 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1131 return SDValue();
1132 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1133 Add.getOperand(1)) &&
1134 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1135 Add2.getOperand(1))))
1136 NVT = VT;
1137 else
1138 return SDValue();
1139 }
1140
1141 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1142 // this is likely to stop other folds (reassociation, value tracking etc.)
1143 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1144 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1145 return SDValue();
1146
1147 SDLoc DL(Op);
1148 SDValue ResultAVG =
1149 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1150 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1151 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1152}
1153
1154/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1155/// result of Op are ever used downstream. If we can use this information to
1156/// simplify Op, create a new simplified DAG node and return true, returning the
1157/// original and new nodes in Old and New. Otherwise, analyze the expression and
1158/// return a mask of Known bits for the expression (used to simplify the
1159/// caller). The Known bits may only be accurate for those bits in the
1160/// OriginalDemandedBits and OriginalDemandedElts.
1162 SDValue Op, const APInt &OriginalDemandedBits,
1163 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1164 unsigned Depth, bool AssumeSingleUse) const {
1165 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1166 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1167 "Mask size mismatches value type size!");
1168
1169 // Don't know anything.
1170 Known = KnownBits(BitWidth);
1171
1172 EVT VT = Op.getValueType();
1173 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1174 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1175 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1176 "Unexpected vector size");
1177
1178 APInt DemandedBits = OriginalDemandedBits;
1179 APInt DemandedElts = OriginalDemandedElts;
1180 SDLoc dl(Op);
1181
1182 // Undef operand.
1183 if (Op.isUndef())
1184 return false;
1185
1186 // We can't simplify target constants.
1187 if (Op.getOpcode() == ISD::TargetConstant)
1188 return false;
1189
1190 if (Op.getOpcode() == ISD::Constant) {
1191 // We know all of the bits for a constant!
1192 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1193 return false;
1194 }
1195
1196 if (Op.getOpcode() == ISD::ConstantFP) {
1197 // We know all of the bits for a floating point constant!
1199 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1200 return false;
1201 }
1202
1203 // Other users may use these bits.
1204 bool HasMultiUse = false;
1205 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1207 // Limit search depth.
1208 return false;
1209 }
1210 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1212 DemandedElts = APInt::getAllOnes(NumElts);
1213 HasMultiUse = true;
1214 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1215 // Not demanding any bits/elts from Op.
1216 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1217 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1218 // Limit search depth.
1219 return false;
1220 }
1221
1222 KnownBits Known2;
1223 switch (Op.getOpcode()) {
1224 case ISD::SCALAR_TO_VECTOR: {
1225 if (VT.isScalableVector())
1226 return false;
1227 if (!DemandedElts[0])
1228 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1229
1230 KnownBits SrcKnown;
1231 SDValue Src = Op.getOperand(0);
1232 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1233 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1234 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1235 return true;
1236
1237 // Upper elements are undef, so only get the knownbits if we just demand
1238 // the bottom element.
1239 if (DemandedElts == 1)
1240 Known = SrcKnown.anyextOrTrunc(BitWidth);
1241 break;
1242 }
1243 case ISD::BUILD_VECTOR:
1244 // Collect the known bits that are shared by every demanded element.
1245 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1246 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1247 return false; // Don't fall through, will infinitely loop.
1248 case ISD::SPLAT_VECTOR: {
1249 SDValue Scl = Op.getOperand(0);
1250 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1251 KnownBits KnownScl;
1252 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1253 return true;
1254
1255 // Implicitly truncate the bits to match the official semantics of
1256 // SPLAT_VECTOR.
1257 Known = KnownScl.trunc(BitWidth);
1258 break;
1259 }
1260 case ISD::LOAD: {
1261 auto *LD = cast<LoadSDNode>(Op);
1262 if (getTargetConstantFromLoad(LD)) {
1263 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1264 return false; // Don't fall through, will infinitely loop.
1265 }
1266 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1267 // If this is a ZEXTLoad and we are looking at the loaded value.
1268 EVT MemVT = LD->getMemoryVT();
1269 unsigned MemBits = MemVT.getScalarSizeInBits();
1270 Known.Zero.setBitsFrom(MemBits);
1271 return false; // Don't fall through, will infinitely loop.
1272 }
1273 break;
1274 }
1276 if (VT.isScalableVector())
1277 return false;
1278 SDValue Vec = Op.getOperand(0);
1279 SDValue Scl = Op.getOperand(1);
1280 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1281 EVT VecVT = Vec.getValueType();
1282
1283 // If index isn't constant, assume we need all vector elements AND the
1284 // inserted element.
1285 APInt DemandedVecElts(DemandedElts);
1286 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1287 unsigned Idx = CIdx->getZExtValue();
1288 DemandedVecElts.clearBit(Idx);
1289
1290 // Inserted element is not required.
1291 if (!DemandedElts[Idx])
1292 return TLO.CombineTo(Op, Vec);
1293 }
1294
1295 KnownBits KnownScl;
1296 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1297 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1298 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1299 return true;
1300
1301 Known = KnownScl.anyextOrTrunc(BitWidth);
1302
1303 KnownBits KnownVec;
1304 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1305 Depth + 1))
1306 return true;
1307
1308 if (!!DemandedVecElts)
1309 Known = Known.intersectWith(KnownVec);
1310
1311 return false;
1312 }
1313 case ISD::INSERT_SUBVECTOR: {
1314 if (VT.isScalableVector())
1315 return false;
1316 // Demand any elements from the subvector and the remainder from the src its
1317 // inserted into.
1318 SDValue Src = Op.getOperand(0);
1319 SDValue Sub = Op.getOperand(1);
1320 uint64_t Idx = Op.getConstantOperandVal(2);
1321 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1322 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1323 APInt DemandedSrcElts = DemandedElts;
1324 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1325
1326 KnownBits KnownSub, KnownSrc;
1327 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1328 Depth + 1))
1329 return true;
1330 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1331 Depth + 1))
1332 return true;
1333
1334 Known.Zero.setAllBits();
1335 Known.One.setAllBits();
1336 if (!!DemandedSubElts)
1337 Known = Known.intersectWith(KnownSub);
1338 if (!!DemandedSrcElts)
1339 Known = Known.intersectWith(KnownSrc);
1340
1341 // Attempt to avoid multi-use src if we don't need anything from it.
1342 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1343 !DemandedSrcElts.isAllOnes()) {
1345 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1347 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1348 if (NewSub || NewSrc) {
1349 NewSub = NewSub ? NewSub : Sub;
1350 NewSrc = NewSrc ? NewSrc : Src;
1351 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1352 Op.getOperand(2));
1353 return TLO.CombineTo(Op, NewOp);
1354 }
1355 }
1356 break;
1357 }
1359 if (VT.isScalableVector())
1360 return false;
1361 // Offset the demanded elts by the subvector index.
1362 SDValue Src = Op.getOperand(0);
1363 if (Src.getValueType().isScalableVector())
1364 break;
1365 uint64_t Idx = Op.getConstantOperandVal(1);
1366 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1367 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1368
1369 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1370 Depth + 1))
1371 return true;
1372
1373 // Attempt to avoid multi-use src if we don't need anything from it.
1374 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1376 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1377 if (DemandedSrc) {
1378 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1379 Op.getOperand(1));
1380 return TLO.CombineTo(Op, NewOp);
1381 }
1382 }
1383 break;
1384 }
1385 case ISD::CONCAT_VECTORS: {
1386 if (VT.isScalableVector())
1387 return false;
1388 Known.Zero.setAllBits();
1389 Known.One.setAllBits();
1390 EVT SubVT = Op.getOperand(0).getValueType();
1391 unsigned NumSubVecs = Op.getNumOperands();
1392 unsigned NumSubElts = SubVT.getVectorNumElements();
1393 for (unsigned i = 0; i != NumSubVecs; ++i) {
1394 APInt DemandedSubElts =
1395 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1396 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1397 Known2, TLO, Depth + 1))
1398 return true;
1399 // Known bits are shared by every demanded subvector element.
1400 if (!!DemandedSubElts)
1401 Known = Known.intersectWith(Known2);
1402 }
1403 break;
1404 }
1405 case ISD::VECTOR_SHUFFLE: {
1406 assert(!VT.isScalableVector());
1407 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1408
1409 // Collect demanded elements from shuffle operands..
1410 APInt DemandedLHS, DemandedRHS;
1411 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1412 DemandedRHS))
1413 break;
1414
1415 if (!!DemandedLHS || !!DemandedRHS) {
1416 SDValue Op0 = Op.getOperand(0);
1417 SDValue Op1 = Op.getOperand(1);
1418
1419 Known.Zero.setAllBits();
1420 Known.One.setAllBits();
1421 if (!!DemandedLHS) {
1422 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1423 Depth + 1))
1424 return true;
1425 Known = Known.intersectWith(Known2);
1426 }
1427 if (!!DemandedRHS) {
1428 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1429 Depth + 1))
1430 return true;
1431 Known = Known.intersectWith(Known2);
1432 }
1433
1434 // Attempt to avoid multi-use ops if we don't need anything from them.
1436 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1438 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1439 if (DemandedOp0 || DemandedOp1) {
1440 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1441 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1442 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1443 return TLO.CombineTo(Op, NewOp);
1444 }
1445 }
1446 break;
1447 }
1448 case ISD::AND: {
1449 SDValue Op0 = Op.getOperand(0);
1450 SDValue Op1 = Op.getOperand(1);
1451
1452 // If the RHS is a constant, check to see if the LHS would be zero without
1453 // using the bits from the RHS. Below, we use knowledge about the RHS to
1454 // simplify the LHS, here we're using information from the LHS to simplify
1455 // the RHS.
1456 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1457 // Do not increment Depth here; that can cause an infinite loop.
1458 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1459 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1460 if ((LHSKnown.Zero & DemandedBits) ==
1461 (~RHSC->getAPIntValue() & DemandedBits))
1462 return TLO.CombineTo(Op, Op0);
1463
1464 // If any of the set bits in the RHS are known zero on the LHS, shrink
1465 // the constant.
1466 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1467 DemandedElts, TLO))
1468 return true;
1469
1470 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1471 // constant, but if this 'and' is only clearing bits that were just set by
1472 // the xor, then this 'and' can be eliminated by shrinking the mask of
1473 // the xor. For example, for a 32-bit X:
1474 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1475 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1476 LHSKnown.One == ~RHSC->getAPIntValue()) {
1477 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1478 return TLO.CombineTo(Op, Xor);
1479 }
1480 }
1481
1482 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1483 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1484 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1485 (Op0.getOperand(0).isUndef() ||
1487 Op0->hasOneUse()) {
1488 unsigned NumSubElts =
1490 unsigned SubIdx = Op0.getConstantOperandVal(2);
1491 APInt DemandedSub =
1492 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1493 KnownBits KnownSubMask =
1494 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1495 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1496 SDValue NewAnd =
1497 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1498 SDValue NewInsert =
1499 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1500 Op0.getOperand(1), Op0.getOperand(2));
1501 return TLO.CombineTo(Op, NewInsert);
1502 }
1503 }
1504
1505 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1506 Depth + 1))
1507 return true;
1508 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1509 Known2, TLO, Depth + 1))
1510 return true;
1511
1512 // If all of the demanded bits are known one on one side, return the other.
1513 // These bits cannot contribute to the result of the 'and'.
1514 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1515 return TLO.CombineTo(Op, Op0);
1516 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1517 return TLO.CombineTo(Op, Op1);
1518 // If all of the demanded bits in the inputs are known zeros, return zero.
1519 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1520 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1521 // If the RHS is a constant, see if we can simplify it.
1522 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1523 TLO))
1524 return true;
1525 // If the operation can be done in a smaller type, do so.
1527 return true;
1528
1529 // Attempt to avoid multi-use ops if we don't need anything from them.
1530 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1532 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1534 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1535 if (DemandedOp0 || DemandedOp1) {
1536 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1537 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1538 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1539 return TLO.CombineTo(Op, NewOp);
1540 }
1541 }
1542
1543 Known &= Known2;
1544 break;
1545 }
1546 case ISD::OR: {
1547 SDValue Op0 = Op.getOperand(0);
1548 SDValue Op1 = Op.getOperand(1);
1549 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1550 Depth + 1)) {
1551 Op->dropFlags(SDNodeFlags::Disjoint);
1552 return true;
1553 }
1554
1555 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1556 Known2, TLO, Depth + 1)) {
1557 Op->dropFlags(SDNodeFlags::Disjoint);
1558 return true;
1559 }
1560
1561 // If all of the demanded bits are known zero on one side, return the other.
1562 // These bits cannot contribute to the result of the 'or'.
1563 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1564 return TLO.CombineTo(Op, Op0);
1565 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1566 return TLO.CombineTo(Op, Op1);
1567 // If the RHS is a constant, see if we can simplify it.
1568 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1569 return true;
1570 // If the operation can be done in a smaller type, do so.
1572 return true;
1573
1574 // Attempt to avoid multi-use ops if we don't need anything from them.
1575 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1577 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1579 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1580 if (DemandedOp0 || DemandedOp1) {
1581 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1582 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1583 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1584 return TLO.CombineTo(Op, NewOp);
1585 }
1586 }
1587
1588 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1589 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1590 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1591 Op0->hasOneUse() && Op1->hasOneUse()) {
1592 // Attempt to match all commutations - m_c_Or would've been useful!
1593 for (int I = 0; I != 2; ++I) {
1594 SDValue X = Op.getOperand(I).getOperand(0);
1595 SDValue C1 = Op.getOperand(I).getOperand(1);
1596 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1597 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1598 if (Alt.getOpcode() == ISD::OR) {
1599 for (int J = 0; J != 2; ++J) {
1600 if (X == Alt.getOperand(J)) {
1601 SDValue Y = Alt.getOperand(1 - J);
1602 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1603 {C1, C2})) {
1604 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1605 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1606 return TLO.CombineTo(
1607 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1608 }
1609 }
1610 }
1611 }
1612 }
1613 }
1614
1615 Known |= Known2;
1616 break;
1617 }
1618 case ISD::XOR: {
1619 SDValue Op0 = Op.getOperand(0);
1620 SDValue Op1 = Op.getOperand(1);
1621
1622 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1623 Depth + 1))
1624 return true;
1625 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1626 Depth + 1))
1627 return true;
1628
1629 // If all of the demanded bits are known zero on one side, return the other.
1630 // These bits cannot contribute to the result of the 'xor'.
1631 if (DemandedBits.isSubsetOf(Known.Zero))
1632 return TLO.CombineTo(Op, Op0);
1633 if (DemandedBits.isSubsetOf(Known2.Zero))
1634 return TLO.CombineTo(Op, Op1);
1635 // If the operation can be done in a smaller type, do so.
1637 return true;
1638
1639 // If all of the unknown bits are known to be zero on one side or the other
1640 // turn this into an *inclusive* or.
1641 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1642 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1643 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1644
1645 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1646 if (C) {
1647 // If one side is a constant, and all of the set bits in the constant are
1648 // also known set on the other side, turn this into an AND, as we know
1649 // the bits will be cleared.
1650 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1651 // NB: it is okay if more bits are known than are requested
1652 if (C->getAPIntValue() == Known2.One) {
1653 SDValue ANDC =
1654 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1655 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1656 }
1657
1658 // If the RHS is a constant, see if we can change it. Don't alter a -1
1659 // constant because that's a 'not' op, and that is better for combining
1660 // and codegen.
1661 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1662 // We're flipping all demanded bits. Flip the undemanded bits too.
1663 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1664 return TLO.CombineTo(Op, New);
1665 }
1666
1667 unsigned Op0Opcode = Op0.getOpcode();
1668 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1669 if (ConstantSDNode *ShiftC =
1670 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1671 // Don't crash on an oversized shift. We can not guarantee that a
1672 // bogus shift has been simplified to undef.
1673 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1674 uint64_t ShiftAmt = ShiftC->getZExtValue();
1676 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1677 : Ones.lshr(ShiftAmt);
1678 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1680 // If the xor constant is a demanded mask, do a 'not' before the
1681 // shift:
1682 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1683 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1684 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1685 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1686 Op0.getOperand(1)));
1687 }
1688 }
1689 }
1690 }
1691 }
1692
1693 // If we can't turn this into a 'not', try to shrink the constant.
1694 if (!C || !C->isAllOnes())
1695 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1696 return true;
1697
1698 // Attempt to avoid multi-use ops if we don't need anything from them.
1699 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1701 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1703 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1704 if (DemandedOp0 || DemandedOp1) {
1705 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1706 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1707 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1708 return TLO.CombineTo(Op, NewOp);
1709 }
1710 }
1711
1712 Known ^= Known2;
1713 break;
1714 }
1715 case ISD::SELECT:
1716 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1717 Known, TLO, Depth + 1))
1718 return true;
1719 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1720 Known2, TLO, Depth + 1))
1721 return true;
1722
1723 // If the operands are constants, see if we can simplify them.
1724 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1725 return true;
1726
1727 // Only known if known in both the LHS and RHS.
1728 Known = Known.intersectWith(Known2);
1729 break;
1730 case ISD::VSELECT:
1731 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1732 Known, TLO, Depth + 1))
1733 return true;
1734 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1735 Known2, TLO, Depth + 1))
1736 return true;
1737
1738 // Only known if known in both the LHS and RHS.
1739 Known = Known.intersectWith(Known2);
1740 break;
1741 case ISD::SELECT_CC:
1742 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1743 Known, TLO, Depth + 1))
1744 return true;
1745 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1746 Known2, TLO, Depth + 1))
1747 return true;
1748
1749 // If the operands are constants, see if we can simplify them.
1750 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1751 return true;
1752
1753 // Only known if known in both the LHS and RHS.
1754 Known = Known.intersectWith(Known2);
1755 break;
1756 case ISD::SETCC: {
1757 SDValue Op0 = Op.getOperand(0);
1758 SDValue Op1 = Op.getOperand(1);
1759 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1760 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1761 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1762 // -1, we may be able to bypass the setcc.
1763 if (DemandedBits.isSignMask() &&
1767 // If we're testing X < 0, then this compare isn't needed - just use X!
1768 // FIXME: We're limiting to integer types here, but this should also work
1769 // if we don't care about FP signed-zero. The use of SETLT with FP means
1770 // that we don't care about NaNs.
1771 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1773 return TLO.CombineTo(Op, Op0);
1774
1775 // TODO: Should we check for other forms of sign-bit comparisons?
1776 // Examples: X <= -1, X >= 0
1777 }
1778 if (getBooleanContents(Op0.getValueType()) ==
1780 BitWidth > 1)
1781 Known.Zero.setBitsFrom(1);
1782 break;
1783 }
1784 case ISD::SHL: {
1785 SDValue Op0 = Op.getOperand(0);
1786 SDValue Op1 = Op.getOperand(1);
1787 EVT ShiftVT = Op1.getValueType();
1788
1789 if (std::optional<unsigned> KnownSA =
1790 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1791 unsigned ShAmt = *KnownSA;
1792 if (ShAmt == 0)
1793 return TLO.CombineTo(Op, Op0);
1794
1795 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1796 // single shift. We can do this if the bottom bits (which are shifted
1797 // out) are never demanded.
1798 // TODO - support non-uniform vector amounts.
1799 if (Op0.getOpcode() == ISD::SRL) {
1800 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1801 if (std::optional<unsigned> InnerSA =
1802 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1803 unsigned C1 = *InnerSA;
1804 unsigned Opc = ISD::SHL;
1805 int Diff = ShAmt - C1;
1806 if (Diff < 0) {
1807 Diff = -Diff;
1808 Opc = ISD::SRL;
1809 }
1810 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1811 return TLO.CombineTo(
1812 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1813 }
1814 }
1815 }
1816
1817 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1818 // are not demanded. This will likely allow the anyext to be folded away.
1819 // TODO - support non-uniform vector amounts.
1820 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1821 SDValue InnerOp = Op0.getOperand(0);
1822 EVT InnerVT = InnerOp.getValueType();
1823 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1824 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1825 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1826 SDValue NarrowShl = TLO.DAG.getNode(
1827 ISD::SHL, dl, InnerVT, InnerOp,
1828 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1829 return TLO.CombineTo(
1830 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1831 }
1832
1833 // Repeat the SHL optimization above in cases where an extension
1834 // intervenes: (shl (anyext (shr x, c1)), c2) to
1835 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1836 // aren't demanded (as above) and that the shifted upper c1 bits of
1837 // x aren't demanded.
1838 // TODO - support non-uniform vector amounts.
1839 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1840 InnerOp.hasOneUse()) {
1841 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1842 InnerOp, DemandedElts, Depth + 2)) {
1843 unsigned InnerShAmt = *SA2;
1844 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1845 DemandedBits.getActiveBits() <=
1846 (InnerBits - InnerShAmt + ShAmt) &&
1847 DemandedBits.countr_zero() >= ShAmt) {
1848 SDValue NewSA =
1849 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1850 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1851 InnerOp.getOperand(0));
1852 return TLO.CombineTo(
1853 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1854 }
1855 }
1856 }
1857 }
1858
1859 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1860 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1861 Depth + 1)) {
1862 // Disable the nsw and nuw flags. We can no longer guarantee that we
1863 // won't wrap after simplification.
1864 Op->dropFlags(SDNodeFlags::NoWrap);
1865 return true;
1866 }
1867 Known <<= ShAmt;
1868 // low bits known zero.
1869 Known.Zero.setLowBits(ShAmt);
1870
1871 // Attempt to avoid multi-use ops if we don't need anything from them.
1872 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1874 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1875 if (DemandedOp0) {
1876 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1877 return TLO.CombineTo(Op, NewOp);
1878 }
1879 }
1880
1881 // TODO: Can we merge this fold with the one below?
1882 // Try shrinking the operation as long as the shift amount will still be
1883 // in range.
1884 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1885 Op.getNode()->hasOneUse()) {
1886 // Search for the smallest integer type with free casts to and from
1887 // Op's type. For expedience, just check power-of-2 integer types.
1888 unsigned DemandedSize = DemandedBits.getActiveBits();
1889 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1890 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1891 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1892 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1893 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1894 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1895 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1896 assert(DemandedSize <= SmallVTBits &&
1897 "Narrowed below demanded bits?");
1898 // We found a type with free casts.
1899 SDValue NarrowShl = TLO.DAG.getNode(
1900 ISD::SHL, dl, SmallVT,
1901 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1902 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1903 return TLO.CombineTo(
1904 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1905 }
1906 }
1907 }
1908
1909 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1910 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1911 // Only do this if we demand the upper half so the knownbits are correct.
1912 unsigned HalfWidth = BitWidth / 2;
1913 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1914 DemandedBits.countLeadingOnes() >= HalfWidth) {
1915 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1916 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1917 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1918 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1919 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1920 // If we're demanding the upper bits at all, we must ensure
1921 // that the upper bits of the shift result are known to be zero,
1922 // which is equivalent to the narrow shift being NUW.
1923 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1924 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1925 SDNodeFlags Flags;
1926 Flags.setNoSignedWrap(IsNSW);
1927 Flags.setNoUnsignedWrap(IsNUW);
1928 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1929 SDValue NewShiftAmt =
1930 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1931 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1932 NewShiftAmt, Flags);
1933 SDValue NewExt =
1934 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1935 return TLO.CombineTo(Op, NewExt);
1936 }
1937 }
1938 }
1939 } else {
1940 // This is a variable shift, so we can't shift the demand mask by a known
1941 // amount. But if we are not demanding high bits, then we are not
1942 // demanding those bits from the pre-shifted operand either.
1943 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1944 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1945 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1946 Depth + 1)) {
1947 // Disable the nsw and nuw flags. We can no longer guarantee that we
1948 // won't wrap after simplification.
1949 Op->dropFlags(SDNodeFlags::NoWrap);
1950 return true;
1951 }
1952 Known.resetAll();
1953 }
1954 }
1955
1956 // If we are only demanding sign bits then we can use the shift source
1957 // directly.
1958 if (std::optional<unsigned> MaxSA =
1959 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1960 unsigned ShAmt = *MaxSA;
1961 unsigned NumSignBits =
1962 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1963 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1964 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1965 return TLO.CombineTo(Op, Op0);
1966 }
1967 break;
1968 }
1969 case ISD::SRL: {
1970 SDValue Op0 = Op.getOperand(0);
1971 SDValue Op1 = Op.getOperand(1);
1972 EVT ShiftVT = Op1.getValueType();
1973
1974 if (std::optional<unsigned> KnownSA =
1975 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1976 unsigned ShAmt = *KnownSA;
1977 if (ShAmt == 0)
1978 return TLO.CombineTo(Op, Op0);
1979
1980 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1981 // single shift. We can do this if the top bits (which are shifted out)
1982 // are never demanded.
1983 // TODO - support non-uniform vector amounts.
1984 if (Op0.getOpcode() == ISD::SHL) {
1985 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1986 if (std::optional<unsigned> InnerSA =
1987 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1988 unsigned C1 = *InnerSA;
1989 unsigned Opc = ISD::SRL;
1990 int Diff = ShAmt - C1;
1991 if (Diff < 0) {
1992 Diff = -Diff;
1993 Opc = ISD::SHL;
1994 }
1995 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1996 return TLO.CombineTo(
1997 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1998 }
1999 }
2000 }
2001
2002 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2003 // single sra. We can do this if the top bits are never demanded.
2004 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2005 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2006 if (std::optional<unsigned> InnerSA =
2007 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2008 unsigned C1 = *InnerSA;
2009 // Clamp the combined shift amount if it exceeds the bit width.
2010 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2011 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2012 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2013 Op0.getOperand(0), NewSA));
2014 }
2015 }
2016 }
2017
2018 APInt InDemandedMask = (DemandedBits << ShAmt);
2019
2020 // If the shift is exact, then it does demand the low bits (and knows that
2021 // they are zero).
2022 if (Op->getFlags().hasExact())
2023 InDemandedMask.setLowBits(ShAmt);
2024
2025 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2026 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2027 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2029 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2030 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2031 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2032 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2033 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2034 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2035 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2036 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2037 SDValue NewShiftAmt =
2038 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2039 SDValue NewShift =
2040 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2041 return TLO.CombineTo(
2042 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2043 }
2044 }
2045
2046 // Compute the new bits that are at the top now.
2047 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2048 Depth + 1))
2049 return true;
2050 Known >>= ShAmt;
2051 // High bits known zero.
2052 Known.Zero.setHighBits(ShAmt);
2053
2054 // Attempt to avoid multi-use ops if we don't need anything from them.
2055 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2057 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2058 if (DemandedOp0) {
2059 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2060 return TLO.CombineTo(Op, NewOp);
2061 }
2062 }
2063 } else {
2064 // Use generic knownbits computation as it has support for non-uniform
2065 // shift amounts.
2066 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2067 }
2068
2069 // If we are only demanding sign bits then we can use the shift source
2070 // directly.
2071 if (std::optional<unsigned> MaxSA =
2072 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2073 unsigned ShAmt = *MaxSA;
2074 // Must already be signbits in DemandedBits bounds, and can't demand any
2075 // shifted in zeroes.
2076 if (DemandedBits.countl_zero() >= ShAmt) {
2077 unsigned NumSignBits =
2078 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2079 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2080 return TLO.CombineTo(Op, Op0);
2081 }
2082 }
2083
2084 // Try to match AVG patterns (after shift simplification).
2085 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2086 DemandedElts, Depth + 1))
2087 return TLO.CombineTo(Op, AVG);
2088
2089 break;
2090 }
2091 case ISD::SRA: {
2092 SDValue Op0 = Op.getOperand(0);
2093 SDValue Op1 = Op.getOperand(1);
2094 EVT ShiftVT = Op1.getValueType();
2095
2096 // If we only want bits that already match the signbit then we don't need
2097 // to shift.
2098 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2099 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2100 NumHiDemandedBits)
2101 return TLO.CombineTo(Op, Op0);
2102
2103 // If this is an arithmetic shift right and only the low-bit is set, we can
2104 // always convert this into a logical shr, even if the shift amount is
2105 // variable. The low bit of the shift cannot be an input sign bit unless
2106 // the shift amount is >= the size of the datatype, which is undefined.
2107 if (DemandedBits.isOne())
2108 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2109
2110 if (std::optional<unsigned> KnownSA =
2111 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2112 unsigned ShAmt = *KnownSA;
2113 if (ShAmt == 0)
2114 return TLO.CombineTo(Op, Op0);
2115
2116 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2117 // supports sext_inreg.
2118 if (Op0.getOpcode() == ISD::SHL) {
2119 if (std::optional<unsigned> InnerSA =
2120 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2121 unsigned LowBits = BitWidth - ShAmt;
2122 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2123 if (VT.isVector())
2124 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2126
2127 if (*InnerSA == ShAmt) {
2128 if (!TLO.LegalOperations() ||
2130 return TLO.CombineTo(
2131 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2132 Op0.getOperand(0),
2133 TLO.DAG.getValueType(ExtVT)));
2134
2135 // Even if we can't convert to sext_inreg, we might be able to
2136 // remove this shift pair if the input is already sign extended.
2137 unsigned NumSignBits =
2138 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2139 if (NumSignBits > ShAmt)
2140 return TLO.CombineTo(Op, Op0.getOperand(0));
2141 }
2142 }
2143 }
2144
2145 APInt InDemandedMask = (DemandedBits << ShAmt);
2146
2147 // If the shift is exact, then it does demand the low bits (and knows that
2148 // they are zero).
2149 if (Op->getFlags().hasExact())
2150 InDemandedMask.setLowBits(ShAmt);
2151
2152 // If any of the demanded bits are produced by the sign extension, we also
2153 // demand the input sign bit.
2154 if (DemandedBits.countl_zero() < ShAmt)
2155 InDemandedMask.setSignBit();
2156
2157 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2158 Depth + 1))
2159 return true;
2160 Known >>= ShAmt;
2161
2162 // If the input sign bit is known to be zero, or if none of the top bits
2163 // are demanded, turn this into an unsigned shift right.
2164 if (Known.Zero[BitWidth - ShAmt - 1] ||
2165 DemandedBits.countl_zero() >= ShAmt) {
2166 SDNodeFlags Flags;
2167 Flags.setExact(Op->getFlags().hasExact());
2168 return TLO.CombineTo(
2169 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2170 }
2171
2172 int Log2 = DemandedBits.exactLogBase2();
2173 if (Log2 >= 0) {
2174 // The bit must come from the sign.
2175 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2176 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2177 }
2178
2179 if (Known.One[BitWidth - ShAmt - 1])
2180 // New bits are known one.
2181 Known.One.setHighBits(ShAmt);
2182
2183 // Attempt to avoid multi-use ops if we don't need anything from them.
2184 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2186 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2187 if (DemandedOp0) {
2188 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2189 return TLO.CombineTo(Op, NewOp);
2190 }
2191 }
2192 }
2193
2194 // Try to match AVG patterns (after shift simplification).
2195 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2196 DemandedElts, Depth + 1))
2197 return TLO.CombineTo(Op, AVG);
2198
2199 break;
2200 }
2201 case ISD::FSHL:
2202 case ISD::FSHR: {
2203 SDValue Op0 = Op.getOperand(0);
2204 SDValue Op1 = Op.getOperand(1);
2205 SDValue Op2 = Op.getOperand(2);
2206 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2207
2208 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2209 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2210
2211 // For fshl, 0-shift returns the 1st arg.
2212 // For fshr, 0-shift returns the 2nd arg.
2213 if (Amt == 0) {
2214 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2215 Known, TLO, Depth + 1))
2216 return true;
2217 break;
2218 }
2219
2220 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2221 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2222 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2223 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2224 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2225 Depth + 1))
2226 return true;
2227 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2228 Depth + 1))
2229 return true;
2230
2231 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2232 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2233 Known = Known.unionWith(Known2);
2234
2235 // Attempt to avoid multi-use ops if we don't need anything from them.
2236 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2237 !DemandedElts.isAllOnes()) {
2239 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2241 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2242 if (DemandedOp0 || DemandedOp1) {
2243 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2244 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2245 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2246 DemandedOp1, Op2);
2247 return TLO.CombineTo(Op, NewOp);
2248 }
2249 }
2250 }
2251
2252 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2253 if (isPowerOf2_32(BitWidth)) {
2254 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2255 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2256 Known2, TLO, Depth + 1))
2257 return true;
2258 }
2259 break;
2260 }
2261 case ISD::ROTL:
2262 case ISD::ROTR: {
2263 SDValue Op0 = Op.getOperand(0);
2264 SDValue Op1 = Op.getOperand(1);
2265 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2266
2267 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2268 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2269 return TLO.CombineTo(Op, Op0);
2270
2271 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2272 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2273 unsigned RevAmt = BitWidth - Amt;
2274
2275 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2276 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2277 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2278 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2279 Depth + 1))
2280 return true;
2281
2282 // rot*(x, 0) --> x
2283 if (Amt == 0)
2284 return TLO.CombineTo(Op, Op0);
2285
2286 // See if we don't demand either half of the rotated bits.
2287 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2288 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2289 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2290 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2291 }
2292 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2293 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2294 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2295 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2296 }
2297 }
2298
2299 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2300 if (isPowerOf2_32(BitWidth)) {
2301 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2302 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2303 Depth + 1))
2304 return true;
2305 }
2306 break;
2307 }
2308 case ISD::SMIN:
2309 case ISD::SMAX:
2310 case ISD::UMIN:
2311 case ISD::UMAX: {
2312 unsigned Opc = Op.getOpcode();
2313 SDValue Op0 = Op.getOperand(0);
2314 SDValue Op1 = Op.getOperand(1);
2315
2316 // If we're only demanding signbits, then we can simplify to OR/AND node.
2317 unsigned BitOp =
2318 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2319 unsigned NumSignBits =
2320 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2321 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2322 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2323 if (NumSignBits >= NumDemandedUpperBits)
2324 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2325
2326 // Check if one arg is always less/greater than (or equal) to the other arg.
2327 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2328 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2329 switch (Opc) {
2330 case ISD::SMIN:
2331 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2332 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2333 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2334 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2335 Known = KnownBits::smin(Known0, Known1);
2336 break;
2337 case ISD::SMAX:
2338 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2339 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2340 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2341 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2342 Known = KnownBits::smax(Known0, Known1);
2343 break;
2344 case ISD::UMIN:
2345 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2346 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2347 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2348 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2349 Known = KnownBits::umin(Known0, Known1);
2350 break;
2351 case ISD::UMAX:
2352 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2353 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2354 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2355 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2356 Known = KnownBits::umax(Known0, Known1);
2357 break;
2358 }
2359 break;
2360 }
2361 case ISD::BITREVERSE: {
2362 SDValue Src = Op.getOperand(0);
2363 APInt DemandedSrcBits = DemandedBits.reverseBits();
2364 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2365 Depth + 1))
2366 return true;
2367 Known = Known2.reverseBits();
2368 break;
2369 }
2370 case ISD::BSWAP: {
2371 SDValue Src = Op.getOperand(0);
2372
2373 // If the only bits demanded come from one byte of the bswap result,
2374 // just shift the input byte into position to eliminate the bswap.
2375 unsigned NLZ = DemandedBits.countl_zero();
2376 unsigned NTZ = DemandedBits.countr_zero();
2377
2378 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2379 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2380 // have 14 leading zeros, round to 8.
2381 NLZ = alignDown(NLZ, 8);
2382 NTZ = alignDown(NTZ, 8);
2383 // If we need exactly one byte, we can do this transformation.
2384 if (BitWidth - NLZ - NTZ == 8) {
2385 // Replace this with either a left or right shift to get the byte into
2386 // the right place.
2387 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2388 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2389 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2390 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2391 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2392 return TLO.CombineTo(Op, NewOp);
2393 }
2394 }
2395
2396 APInt DemandedSrcBits = DemandedBits.byteSwap();
2397 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2398 Depth + 1))
2399 return true;
2400 Known = Known2.byteSwap();
2401 break;
2402 }
2403 case ISD::CTPOP: {
2404 // If only 1 bit is demanded, replace with PARITY as long as we're before
2405 // op legalization.
2406 // FIXME: Limit to scalars for now.
2407 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2408 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2409 Op.getOperand(0)));
2410
2411 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2412 break;
2413 }
2415 SDValue Op0 = Op.getOperand(0);
2416 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2417 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2418
2419 // If we only care about the highest bit, don't bother shifting right.
2420 if (DemandedBits.isSignMask()) {
2421 unsigned MinSignedBits =
2422 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2423 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2424 // However if the input is already sign extended we expect the sign
2425 // extension to be dropped altogether later and do not simplify.
2426 if (!AlreadySignExtended) {
2427 // Compute the correct shift amount type, which must be getShiftAmountTy
2428 // for scalar types after legalization.
2429 SDValue ShiftAmt =
2430 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2431 return TLO.CombineTo(Op,
2432 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2433 }
2434 }
2435
2436 // If none of the extended bits are demanded, eliminate the sextinreg.
2437 if (DemandedBits.getActiveBits() <= ExVTBits)
2438 return TLO.CombineTo(Op, Op0);
2439
2440 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2441
2442 // Since the sign extended bits are demanded, we know that the sign
2443 // bit is demanded.
2444 InputDemandedBits.setBit(ExVTBits - 1);
2445
2446 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2447 Depth + 1))
2448 return true;
2449
2450 // If the sign bit of the input is known set or clear, then we know the
2451 // top bits of the result.
2452
2453 // If the input sign bit is known zero, convert this into a zero extension.
2454 if (Known.Zero[ExVTBits - 1])
2455 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2456
2457 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2458 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2459 Known.One.setBitsFrom(ExVTBits);
2460 Known.Zero &= Mask;
2461 } else { // Input sign bit unknown
2462 Known.Zero &= Mask;
2463 Known.One &= Mask;
2464 }
2465 break;
2466 }
2467 case ISD::BUILD_PAIR: {
2468 EVT HalfVT = Op.getOperand(0).getValueType();
2469 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2470
2471 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2472 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2473
2474 KnownBits KnownLo, KnownHi;
2475
2476 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2477 return true;
2478
2479 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2480 return true;
2481
2482 Known = KnownHi.concat(KnownLo);
2483 break;
2484 }
2486 if (VT.isScalableVector())
2487 return false;
2488 [[fallthrough]];
2489 case ISD::ZERO_EXTEND: {
2490 SDValue Src = Op.getOperand(0);
2491 EVT SrcVT = Src.getValueType();
2492 unsigned InBits = SrcVT.getScalarSizeInBits();
2493 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2494 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2495
2496 // If none of the top bits are demanded, convert this into an any_extend.
2497 if (DemandedBits.getActiveBits() <= InBits) {
2498 // If we only need the non-extended bits of the bottom element
2499 // then we can just bitcast to the result.
2500 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2501 VT.getSizeInBits() == SrcVT.getSizeInBits())
2502 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2503
2504 unsigned Opc =
2506 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2507 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2508 }
2509
2510 APInt InDemandedBits = DemandedBits.trunc(InBits);
2511 APInt InDemandedElts = DemandedElts.zext(InElts);
2512 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2513 Depth + 1)) {
2514 Op->dropFlags(SDNodeFlags::NonNeg);
2515 return true;
2516 }
2517 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2518 Known = Known.zext(BitWidth);
2519
2520 // Attempt to avoid multi-use ops if we don't need anything from them.
2522 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2523 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2524 break;
2525 }
2527 if (VT.isScalableVector())
2528 return false;
2529 [[fallthrough]];
2530 case ISD::SIGN_EXTEND: {
2531 SDValue Src = Op.getOperand(0);
2532 EVT SrcVT = Src.getValueType();
2533 unsigned InBits = SrcVT.getScalarSizeInBits();
2534 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2535 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2536
2537 APInt InDemandedElts = DemandedElts.zext(InElts);
2538 APInt InDemandedBits = DemandedBits.trunc(InBits);
2539
2540 // Since some of the sign extended bits are demanded, we know that the sign
2541 // bit is demanded.
2542 InDemandedBits.setBit(InBits - 1);
2543
2544 // If none of the top bits are demanded, convert this into an any_extend.
2545 if (DemandedBits.getActiveBits() <= InBits) {
2546 // If we only need the non-extended bits of the bottom element
2547 // then we can just bitcast to the result.
2548 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2549 VT.getSizeInBits() == SrcVT.getSizeInBits())
2550 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2551
2552 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2554 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2555 InBits) {
2556 unsigned Opc =
2558 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2559 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2560 }
2561 }
2562
2563 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2564 Depth + 1))
2565 return true;
2566 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2567
2568 // If the sign bit is known one, the top bits match.
2569 Known = Known.sext(BitWidth);
2570
2571 // If the sign bit is known zero, convert this to a zero extend.
2572 if (Known.isNonNegative()) {
2573 unsigned Opc =
2575 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2576 SDNodeFlags Flags;
2577 if (!IsVecInReg)
2578 Flags |= SDNodeFlags::NonNeg;
2579 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2580 }
2581 }
2582
2583 // Attempt to avoid multi-use ops if we don't need anything from them.
2585 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2586 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2587 break;
2588 }
2590 if (VT.isScalableVector())
2591 return false;
2592 [[fallthrough]];
2593 case ISD::ANY_EXTEND: {
2594 SDValue Src = Op.getOperand(0);
2595 EVT SrcVT = Src.getValueType();
2596 unsigned InBits = SrcVT.getScalarSizeInBits();
2597 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2598 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2599
2600 // If we only need the bottom element then we can just bitcast.
2601 // TODO: Handle ANY_EXTEND?
2602 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2603 VT.getSizeInBits() == SrcVT.getSizeInBits())
2604 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2605
2606 APInt InDemandedBits = DemandedBits.trunc(InBits);
2607 APInt InDemandedElts = DemandedElts.zext(InElts);
2608 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2609 Depth + 1))
2610 return true;
2611 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2612 Known = Known.anyext(BitWidth);
2613
2614 // Attempt to avoid multi-use ops if we don't need anything from them.
2616 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2617 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2618 break;
2619 }
2620 case ISD::TRUNCATE: {
2621 SDValue Src = Op.getOperand(0);
2622
2623 // Simplify the input, using demanded bit information, and compute the known
2624 // zero/one bits live out.
2625 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2626 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2627 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2628 Depth + 1)) {
2629 // Disable the nsw and nuw flags. We can no longer guarantee that we
2630 // won't wrap after simplification.
2631 Op->dropFlags(SDNodeFlags::NoWrap);
2632 return true;
2633 }
2634 Known = Known.trunc(BitWidth);
2635
2636 // Attempt to avoid multi-use ops if we don't need anything from them.
2638 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2639 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2640
2641 // If the input is only used by this truncate, see if we can shrink it based
2642 // on the known demanded bits.
2643 switch (Src.getOpcode()) {
2644 default:
2645 break;
2646 case ISD::SRL:
2647 // Shrink SRL by a constant if none of the high bits shifted in are
2648 // demanded.
2649 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2650 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2651 // undesirable.
2652 break;
2653
2654 if (Src.getNode()->hasOneUse()) {
2655 if (isTruncateFree(Src, VT) &&
2656 !isTruncateFree(Src.getValueType(), VT)) {
2657 // If truncate is only free at trunc(srl), do not turn it into
2658 // srl(trunc). The check is done by first check the truncate is free
2659 // at Src's opcode(srl), then check the truncate is not done by
2660 // referencing sub-register. In test, if both trunc(srl) and
2661 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2662 // trunc(srl)'s trunc is free, trunc(srl) is better.
2663 break;
2664 }
2665
2666 std::optional<unsigned> ShAmtC =
2667 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2668 if (!ShAmtC || *ShAmtC >= BitWidth)
2669 break;
2670 unsigned ShVal = *ShAmtC;
2671
2672 APInt HighBits =
2673 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2674 HighBits.lshrInPlace(ShVal);
2675 HighBits = HighBits.trunc(BitWidth);
2676 if (!(HighBits & DemandedBits)) {
2677 // None of the shifted in bits are needed. Add a truncate of the
2678 // shift input, then shift it.
2679 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2680 SDValue NewTrunc =
2681 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2682 return TLO.CombineTo(
2683 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2684 }
2685 }
2686 break;
2687 }
2688
2689 break;
2690 }
2691 case ISD::AssertZext: {
2692 // AssertZext demands all of the high bits, plus any of the low bits
2693 // demanded by its users.
2694 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2696 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2697 TLO, Depth + 1))
2698 return true;
2699
2700 Known.Zero |= ~InMask;
2701 Known.One &= (~Known.Zero);
2702 break;
2703 }
2705 SDValue Src = Op.getOperand(0);
2706 SDValue Idx = Op.getOperand(1);
2707 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2708 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2709
2710 if (SrcEltCnt.isScalable())
2711 return false;
2712
2713 // Demand the bits from every vector element without a constant index.
2714 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2715 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2716 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2717 if (CIdx->getAPIntValue().ult(NumSrcElts))
2718 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2719
2720 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2721 // anything about the extended bits.
2722 APInt DemandedSrcBits = DemandedBits;
2723 if (BitWidth > EltBitWidth)
2724 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2725
2726 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2727 Depth + 1))
2728 return true;
2729
2730 // Attempt to avoid multi-use ops if we don't need anything from them.
2731 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2732 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2733 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2734 SDValue NewOp =
2735 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2736 return TLO.CombineTo(Op, NewOp);
2737 }
2738 }
2739
2740 Known = Known2;
2741 if (BitWidth > EltBitWidth)
2742 Known = Known.anyext(BitWidth);
2743 break;
2744 }
2745 case ISD::BITCAST: {
2746 if (VT.isScalableVector())
2747 return false;
2748 SDValue Src = Op.getOperand(0);
2749 EVT SrcVT = Src.getValueType();
2750 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2751
2752 // If this is an FP->Int bitcast and if the sign bit is the only
2753 // thing demanded, turn this into a FGETSIGN.
2754 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2755 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2756 SrcVT.isFloatingPoint()) {
2757 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2758 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2759 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2760 SrcVT != MVT::f128) {
2761 // Cannot eliminate/lower SHL for f128 yet.
2762 EVT Ty = OpVTLegal ? VT : MVT::i32;
2763 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2764 // place. We expect the SHL to be eliminated by other optimizations.
2765 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2766 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2767 if (!OpVTLegal && OpVTSizeInBits > 32)
2768 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2769 unsigned ShVal = Op.getValueSizeInBits() - 1;
2770 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2771 return TLO.CombineTo(Op,
2772 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2773 }
2774 }
2775
2776 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2777 // Demand the elt/bit if any of the original elts/bits are demanded.
2778 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2779 unsigned Scale = BitWidth / NumSrcEltBits;
2780 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2781 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2782 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2783 for (unsigned i = 0; i != Scale; ++i) {
2784 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2785 unsigned BitOffset = EltOffset * NumSrcEltBits;
2786 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2787 if (!Sub.isZero()) {
2788 DemandedSrcBits |= Sub;
2789 for (unsigned j = 0; j != NumElts; ++j)
2790 if (DemandedElts[j])
2791 DemandedSrcElts.setBit((j * Scale) + i);
2792 }
2793 }
2794
2795 APInt KnownSrcUndef, KnownSrcZero;
2796 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2797 KnownSrcZero, TLO, Depth + 1))
2798 return true;
2799
2800 KnownBits KnownSrcBits;
2801 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2802 KnownSrcBits, TLO, Depth + 1))
2803 return true;
2804 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2805 // TODO - bigendian once we have test coverage.
2806 unsigned Scale = NumSrcEltBits / BitWidth;
2807 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2808 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2809 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2810 for (unsigned i = 0; i != NumElts; ++i)
2811 if (DemandedElts[i]) {
2812 unsigned Offset = (i % Scale) * BitWidth;
2813 DemandedSrcBits.insertBits(DemandedBits, Offset);
2814 DemandedSrcElts.setBit(i / Scale);
2815 }
2816
2817 if (SrcVT.isVector()) {
2818 APInt KnownSrcUndef, KnownSrcZero;
2819 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2820 KnownSrcZero, TLO, Depth + 1))
2821 return true;
2822 }
2823
2824 KnownBits KnownSrcBits;
2825 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2826 KnownSrcBits, TLO, Depth + 1))
2827 return true;
2828
2829 // Attempt to avoid multi-use ops if we don't need anything from them.
2830 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2831 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2832 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2833 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2834 return TLO.CombineTo(Op, NewOp);
2835 }
2836 }
2837 }
2838
2839 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2840 // recursive call where Known may be useful to the caller.
2841 if (Depth > 0) {
2842 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2843 return false;
2844 }
2845 break;
2846 }
2847 case ISD::MUL:
2848 if (DemandedBits.isPowerOf2()) {
2849 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2850 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2851 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2852 unsigned CTZ = DemandedBits.countr_zero();
2853 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2854 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2855 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2856 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2857 return TLO.CombineTo(Op, Shl);
2858 }
2859 }
2860 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2861 // X * X is odd iff X is odd.
2862 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2863 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2864 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2865 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2866 return TLO.CombineTo(Op, And1);
2867 }
2868 [[fallthrough]];
2869 case ISD::PTRADD:
2870 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
2871 break;
2872 // PTRADD behaves like ADD if pointers are represented as integers.
2873 [[fallthrough]];
2874 case ISD::ADD:
2875 case ISD::SUB: {
2876 // Add, Sub, and Mul don't demand any bits in positions beyond that
2877 // of the highest bit demanded of them.
2878 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2879 SDNodeFlags Flags = Op.getNode()->getFlags();
2880 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2881 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2882 KnownBits KnownOp0, KnownOp1;
2883 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2884 const KnownBits &KnownRHS) {
2885 if (Op.getOpcode() == ISD::MUL)
2886 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2887 return Demanded;
2888 };
2889 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2890 Depth + 1) ||
2891 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2892 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2893 // See if the operation should be performed at a smaller bit width.
2895 // Disable the nsw and nuw flags. We can no longer guarantee that we
2896 // won't wrap after simplification.
2897 Op->dropFlags(SDNodeFlags::NoWrap);
2898 return true;
2899 }
2900
2901 // neg x with only low bit demanded is simply x.
2902 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2903 isNullConstant(Op0))
2904 return TLO.CombineTo(Op, Op1);
2905
2906 // Attempt to avoid multi-use ops if we don't need anything from them.
2907 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2909 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2911 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2912 if (DemandedOp0 || DemandedOp1) {
2913 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2914 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2915 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2916 Flags & ~SDNodeFlags::NoWrap);
2917 return TLO.CombineTo(Op, NewOp);
2918 }
2919 }
2920
2921 // If we have a constant operand, we may be able to turn it into -1 if we
2922 // do not demand the high bits. This can make the constant smaller to
2923 // encode, allow more general folding, or match specialized instruction
2924 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2925 // is probably not useful (and could be detrimental).
2927 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2928 if (C && !C->isAllOnes() && !C->isOne() &&
2929 (C->getAPIntValue() | HighMask).isAllOnes()) {
2930 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2931 // Disable the nsw and nuw flags. We can no longer guarantee that we
2932 // won't wrap after simplification.
2933 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2934 Flags & ~SDNodeFlags::NoWrap);
2935 return TLO.CombineTo(Op, NewOp);
2936 }
2937
2938 // Match a multiply with a disguised negated-power-of-2 and convert to a
2939 // an equivalent shift-left amount.
2940 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2941 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2942 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2943 return 0;
2944
2945 // Don't touch opaque constants. Also, ignore zero and power-of-2
2946 // multiplies. Those will get folded later.
2947 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2948 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2949 !MulC->getAPIntValue().isPowerOf2()) {
2950 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2951 if (UnmaskedC.isNegatedPowerOf2())
2952 return (-UnmaskedC).logBase2();
2953 }
2954 return 0;
2955 };
2956
2957 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2958 unsigned ShlAmt) {
2959 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2960 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2961 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2962 return TLO.CombineTo(Op, Res);
2963 };
2964
2966 if (Op.getOpcode() == ISD::ADD) {
2967 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2968 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2969 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2970 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2971 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2972 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2973 }
2974 if (Op.getOpcode() == ISD::SUB) {
2975 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2976 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2977 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2978 }
2979 }
2980
2981 if (Op.getOpcode() == ISD::MUL) {
2982 Known = KnownBits::mul(KnownOp0, KnownOp1);
2983 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
2985 Op.getOpcode() != ISD::SUB, Flags.hasNoSignedWrap(),
2986 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2987 }
2988 break;
2989 }
2990 case ISD::FABS: {
2991 SDValue Op0 = Op.getOperand(0);
2992 APInt SignMask = APInt::getSignMask(BitWidth);
2993
2994 if (!DemandedBits.intersects(SignMask))
2995 return TLO.CombineTo(Op, Op0);
2996
2997 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
2998 Depth + 1))
2999 return true;
3000
3001 if (Known.isNonNegative())
3002 return TLO.CombineTo(Op, Op0);
3003 if (Known.isNegative())
3004 return TLO.CombineTo(
3005 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
3006
3007 Known.Zero |= SignMask;
3008 Known.One &= ~SignMask;
3009
3010 break;
3011 }
3012 case ISD::FCOPYSIGN: {
3013 SDValue Op0 = Op.getOperand(0);
3014 SDValue Op1 = Op.getOperand(1);
3015
3016 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3017 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3018 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3019 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3020
3021 if (!DemandedBits.intersects(SignMask0))
3022 return TLO.CombineTo(Op, Op0);
3023
3024 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3025 Known, TLO, Depth + 1) ||
3026 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3027 Depth + 1))
3028 return true;
3029
3030 if (Known2.isNonNegative())
3031 return TLO.CombineTo(
3032 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3033
3034 if (Known2.isNegative())
3035 return TLO.CombineTo(
3036 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3037 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3038
3039 Known.Zero &= ~SignMask0;
3040 Known.One &= ~SignMask0;
3041 break;
3042 }
3043 case ISD::FNEG: {
3044 SDValue Op0 = Op.getOperand(0);
3045 APInt SignMask = APInt::getSignMask(BitWidth);
3046
3047 if (!DemandedBits.intersects(SignMask))
3048 return TLO.CombineTo(Op, Op0);
3049
3050 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3051 Depth + 1))
3052 return true;
3053
3054 if (!Known.isSignUnknown()) {
3055 Known.Zero ^= SignMask;
3056 Known.One ^= SignMask;
3057 }
3058
3059 break;
3060 }
3061 default:
3062 // We also ask the target about intrinsics (which could be specific to it).
3063 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3064 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3065 // TODO: Probably okay to remove after audit; here to reduce change size
3066 // in initial enablement patch for scalable vectors
3067 if (Op.getValueType().isScalableVector())
3068 break;
3070 Known, TLO, Depth))
3071 return true;
3072 break;
3073 }
3074
3075 // Just use computeKnownBits to compute output bits.
3076 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3077 break;
3078 }
3079
3080 // If we know the value of all of the demanded bits, return this as a
3081 // constant.
3083 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3084 // Avoid folding to a constant if any OpaqueConstant is involved.
3085 if (llvm::any_of(Op->ops(), [](SDValue V) {
3086 auto *C = dyn_cast<ConstantSDNode>(V);
3087 return C && C->isOpaque();
3088 }))
3089 return false;
3090 if (VT.isInteger())
3091 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3092 if (VT.isFloatingPoint())
3093 return TLO.CombineTo(
3094 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3095 dl, VT));
3096 }
3097
3098 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3099 // Try again just for the original demanded elts.
3100 // Ensure we do this AFTER constant folding above.
3101 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3102 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3103
3104 return false;
3105}
3106
3108 const APInt &DemandedElts,
3109 DAGCombinerInfo &DCI) const {
3110 SelectionDAG &DAG = DCI.DAG;
3111 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3112 !DCI.isBeforeLegalizeOps());
3113
3114 APInt KnownUndef, KnownZero;
3115 bool Simplified =
3116 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3117 if (Simplified) {
3118 DCI.AddToWorklist(Op.getNode());
3119 DCI.CommitTargetLoweringOpt(TLO);
3120 }
3121
3122 return Simplified;
3123}
3124
3125/// Given a vector binary operation and known undefined elements for each input
3126/// operand, compute whether each element of the output is undefined.
3128 const APInt &UndefOp0,
3129 const APInt &UndefOp1) {
3130 EVT VT = BO.getValueType();
3132 "Vector binop only");
3133
3134 EVT EltVT = VT.getVectorElementType();
3135 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3136 assert(UndefOp0.getBitWidth() == NumElts &&
3137 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3138
3139 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3140 const APInt &UndefVals) {
3141 if (UndefVals[Index])
3142 return DAG.getUNDEF(EltVT);
3143
3144 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3145 // Try hard to make sure that the getNode() call is not creating temporary
3146 // nodes. Ignore opaque integers because they do not constant fold.
3147 SDValue Elt = BV->getOperand(Index);
3148 auto *C = dyn_cast<ConstantSDNode>(Elt);
3149 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3150 return Elt;
3151 }
3152
3153 return SDValue();
3154 };
3155
3156 APInt KnownUndef = APInt::getZero(NumElts);
3157 for (unsigned i = 0; i != NumElts; ++i) {
3158 // If both inputs for this element are either constant or undef and match
3159 // the element type, compute the constant/undef result for this element of
3160 // the vector.
3161 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3162 // not handle FP constants. The code within getNode() should be refactored
3163 // to avoid the danger of creating a bogus temporary node here.
3164 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3165 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3166 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3167 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3168 KnownUndef.setBit(i);
3169 }
3170 return KnownUndef;
3171}
3172
3174 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3175 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3176 bool AssumeSingleUse) const {
3177 EVT VT = Op.getValueType();
3178 unsigned Opcode = Op.getOpcode();
3179 APInt DemandedElts = OriginalDemandedElts;
3180 unsigned NumElts = DemandedElts.getBitWidth();
3181 assert(VT.isVector() && "Expected vector op");
3182
3183 KnownUndef = KnownZero = APInt::getZero(NumElts);
3184
3186 return false;
3187
3188 // TODO: For now we assume we know nothing about scalable vectors.
3189 if (VT.isScalableVector())
3190 return false;
3191
3192 assert(VT.getVectorNumElements() == NumElts &&
3193 "Mask size mismatches value type element count!");
3194
3195 // Undef operand.
3196 if (Op.isUndef()) {
3197 KnownUndef.setAllBits();
3198 return false;
3199 }
3200
3201 // If Op has other users, assume that all elements are needed.
3202 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3203 DemandedElts.setAllBits();
3204
3205 // Not demanding any elements from Op.
3206 if (DemandedElts == 0) {
3207 KnownUndef.setAllBits();
3208 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3209 }
3210
3211 // Limit search depth.
3213 return false;
3214
3215 SDLoc DL(Op);
3216 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3217 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3218
3219 // Helper for demanding the specified elements and all the bits of both binary
3220 // operands.
3221 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3222 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3223 TLO.DAG, Depth + 1);
3224 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3225 TLO.DAG, Depth + 1);
3226 if (NewOp0 || NewOp1) {
3227 SDValue NewOp =
3228 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3229 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3230 return TLO.CombineTo(Op, NewOp);
3231 }
3232 return false;
3233 };
3234
3235 switch (Opcode) {
3236 case ISD::SCALAR_TO_VECTOR: {
3237 if (!DemandedElts[0]) {
3238 KnownUndef.setAllBits();
3239 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3240 }
3241 KnownUndef.setHighBits(NumElts - 1);
3242 break;
3243 }
3244 case ISD::BITCAST: {
3245 SDValue Src = Op.getOperand(0);
3246 EVT SrcVT = Src.getValueType();
3247
3248 if (!SrcVT.isVector()) {
3249 // TODO - bigendian once we have test coverage.
3250 if (IsLE) {
3251 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3252 unsigned EltSize = VT.getScalarSizeInBits();
3253 for (unsigned I = 0; I != NumElts; ++I) {
3254 if (DemandedElts[I]) {
3255 unsigned Offset = I * EltSize;
3256 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3257 }
3258 }
3259 KnownBits Known;
3260 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3261 return true;
3262 }
3263 break;
3264 }
3265
3266 // Fast handling of 'identity' bitcasts.
3267 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3268 if (NumSrcElts == NumElts)
3269 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3270 KnownZero, TLO, Depth + 1);
3271
3272 APInt SrcDemandedElts, SrcZero, SrcUndef;
3273
3274 // Bitcast from 'large element' src vector to 'small element' vector, we
3275 // must demand a source element if any DemandedElt maps to it.
3276 if ((NumElts % NumSrcElts) == 0) {
3277 unsigned Scale = NumElts / NumSrcElts;
3278 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3279 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3280 TLO, Depth + 1))
3281 return true;
3282
3283 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3284 // of the large element.
3285 // TODO - bigendian once we have test coverage.
3286 if (IsLE) {
3287 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3288 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3289 for (unsigned i = 0; i != NumElts; ++i)
3290 if (DemandedElts[i]) {
3291 unsigned Ofs = (i % Scale) * EltSizeInBits;
3292 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3293 }
3294
3295 KnownBits Known;
3296 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3297 TLO, Depth + 1))
3298 return true;
3299
3300 // The bitcast has split each wide element into a number of
3301 // narrow subelements. We have just computed the Known bits
3302 // for wide elements. See if element splitting results in
3303 // some subelements being zero. Only for demanded elements!
3304 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3305 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3306 .isAllOnes())
3307 continue;
3308 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3309 unsigned Elt = Scale * SrcElt + SubElt;
3310 if (DemandedElts[Elt])
3311 KnownZero.setBit(Elt);
3312 }
3313 }
3314 }
3315
3316 // If the src element is zero/undef then all the output elements will be -
3317 // only demanded elements are guaranteed to be correct.
3318 for (unsigned i = 0; i != NumSrcElts; ++i) {
3319 if (SrcDemandedElts[i]) {
3320 if (SrcZero[i])
3321 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3322 if (SrcUndef[i])
3323 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3324 }
3325 }
3326 }
3327
3328 // Bitcast from 'small element' src vector to 'large element' vector, we
3329 // demand all smaller source elements covered by the larger demanded element
3330 // of this vector.
3331 if ((NumSrcElts % NumElts) == 0) {
3332 unsigned Scale = NumSrcElts / NumElts;
3333 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3334 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3335 TLO, Depth + 1))
3336 return true;
3337
3338 // If all the src elements covering an output element are zero/undef, then
3339 // the output element will be as well, assuming it was demanded.
3340 for (unsigned i = 0; i != NumElts; ++i) {
3341 if (DemandedElts[i]) {
3342 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3343 KnownZero.setBit(i);
3344 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3345 KnownUndef.setBit(i);
3346 }
3347 }
3348 }
3349 break;
3350 }
3351 case ISD::FREEZE: {
3352 SDValue N0 = Op.getOperand(0);
3353 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3354 /*PoisonOnly=*/false,
3355 Depth + 1))
3356 return TLO.CombineTo(Op, N0);
3357
3358 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3359 // freeze(op(x, ...)) -> op(freeze(x), ...).
3360 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3361 return TLO.CombineTo(
3363 TLO.DAG.getFreeze(N0.getOperand(0))));
3364 break;
3365 }
3366 case ISD::BUILD_VECTOR: {
3367 // Check all elements and simplify any unused elements with UNDEF.
3368 if (!DemandedElts.isAllOnes()) {
3369 // Don't simplify BROADCASTS.
3370 if (llvm::any_of(Op->op_values(),
3371 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3373 bool Updated = false;
3374 for (unsigned i = 0; i != NumElts; ++i) {
3375 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3376 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3377 KnownUndef.setBit(i);
3378 Updated = true;
3379 }
3380 }
3381 if (Updated)
3382 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3383 }
3384 }
3385 for (unsigned i = 0; i != NumElts; ++i) {
3386 SDValue SrcOp = Op.getOperand(i);
3387 if (SrcOp.isUndef()) {
3388 KnownUndef.setBit(i);
3389 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3391 KnownZero.setBit(i);
3392 }
3393 }
3394 break;
3395 }
3396 case ISD::CONCAT_VECTORS: {
3397 EVT SubVT = Op.getOperand(0).getValueType();
3398 unsigned NumSubVecs = Op.getNumOperands();
3399 unsigned NumSubElts = SubVT.getVectorNumElements();
3400 for (unsigned i = 0; i != NumSubVecs; ++i) {
3401 SDValue SubOp = Op.getOperand(i);
3402 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3403 APInt SubUndef, SubZero;
3404 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3405 Depth + 1))
3406 return true;
3407 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3408 KnownZero.insertBits(SubZero, i * NumSubElts);
3409 }
3410
3411 // Attempt to avoid multi-use ops if we don't need anything from them.
3412 if (!DemandedElts.isAllOnes()) {
3413 bool FoundNewSub = false;
3414 SmallVector<SDValue, 2> DemandedSubOps;
3415 for (unsigned i = 0; i != NumSubVecs; ++i) {
3416 SDValue SubOp = Op.getOperand(i);
3417 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3419 SubOp, SubElts, TLO.DAG, Depth + 1);
3420 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3421 FoundNewSub = NewSubOp ? true : FoundNewSub;
3422 }
3423 if (FoundNewSub) {
3424 SDValue NewOp =
3425 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3426 return TLO.CombineTo(Op, NewOp);
3427 }
3428 }
3429 break;
3430 }
3431 case ISD::INSERT_SUBVECTOR: {
3432 // Demand any elements from the subvector and the remainder from the src it
3433 // is inserted into.
3434 SDValue Src = Op.getOperand(0);
3435 SDValue Sub = Op.getOperand(1);
3436 uint64_t Idx = Op.getConstantOperandVal(2);
3437 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3438 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3439 APInt DemandedSrcElts = DemandedElts;
3440 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3441
3442 // If none of the sub operand elements are demanded, bypass the insert.
3443 if (!DemandedSubElts)
3444 return TLO.CombineTo(Op, Src);
3445
3446 APInt SubUndef, SubZero;
3447 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3448 Depth + 1))
3449 return true;
3450
3451 // If none of the src operand elements are demanded, replace it with undef.
3452 if (!DemandedSrcElts && !Src.isUndef())
3453 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3454 TLO.DAG.getUNDEF(VT), Sub,
3455 Op.getOperand(2)));
3456
3457 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3458 TLO, Depth + 1))
3459 return true;
3460 KnownUndef.insertBits(SubUndef, Idx);
3461 KnownZero.insertBits(SubZero, Idx);
3462
3463 // Attempt to avoid multi-use ops if we don't need anything from them.
3464 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3466 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3468 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3469 if (NewSrc || NewSub) {
3470 NewSrc = NewSrc ? NewSrc : Src;
3471 NewSub = NewSub ? NewSub : Sub;
3472 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3473 NewSub, Op.getOperand(2));
3474 return TLO.CombineTo(Op, NewOp);
3475 }
3476 }
3477 break;
3478 }
3480 // Offset the demanded elts by the subvector index.
3481 SDValue Src = Op.getOperand(0);
3482 if (Src.getValueType().isScalableVector())
3483 break;
3484 uint64_t Idx = Op.getConstantOperandVal(1);
3485 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3486 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3487
3488 APInt SrcUndef, SrcZero;
3489 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3490 Depth + 1))
3491 return true;
3492 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3493 KnownZero = SrcZero.extractBits(NumElts, Idx);
3494
3495 // Attempt to avoid multi-use ops if we don't need anything from them.
3496 if (!DemandedElts.isAllOnes()) {
3498 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3499 if (NewSrc) {
3500 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3501 Op.getOperand(1));
3502 return TLO.CombineTo(Op, NewOp);
3503 }
3504 }
3505 break;
3506 }
3508 SDValue Vec = Op.getOperand(0);
3509 SDValue Scl = Op.getOperand(1);
3510 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3511
3512 // For a legal, constant insertion index, if we don't need this insertion
3513 // then strip it, else remove it from the demanded elts.
3514 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3515 unsigned Idx = CIdx->getZExtValue();
3516 if (!DemandedElts[Idx])
3517 return TLO.CombineTo(Op, Vec);
3518
3519 APInt DemandedVecElts(DemandedElts);
3520 DemandedVecElts.clearBit(Idx);
3521 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3522 KnownZero, TLO, Depth + 1))
3523 return true;
3524
3525 KnownUndef.setBitVal(Idx, Scl.isUndef());
3526
3527 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3528 break;
3529 }
3530
3531 APInt VecUndef, VecZero;
3532 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3533 Depth + 1))
3534 return true;
3535 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3536 break;
3537 }
3538 case ISD::VSELECT: {
3539 SDValue Sel = Op.getOperand(0);
3540 SDValue LHS = Op.getOperand(1);
3541 SDValue RHS = Op.getOperand(2);
3542
3543 // Try to transform the select condition based on the current demanded
3544 // elements.
3545 APInt UndefSel, ZeroSel;
3546 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3547 Depth + 1))
3548 return true;
3549
3550 // See if we can simplify either vselect operand.
3551 APInt DemandedLHS(DemandedElts);
3552 APInt DemandedRHS(DemandedElts);
3553 APInt UndefLHS, ZeroLHS;
3554 APInt UndefRHS, ZeroRHS;
3555 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3556 Depth + 1))
3557 return true;
3558 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3559 Depth + 1))
3560 return true;
3561
3562 KnownUndef = UndefLHS & UndefRHS;
3563 KnownZero = ZeroLHS & ZeroRHS;
3564
3565 // If we know that the selected element is always zero, we don't need the
3566 // select value element.
3567 APInt DemandedSel = DemandedElts & ~KnownZero;
3568 if (DemandedSel != DemandedElts)
3569 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3570 Depth + 1))
3571 return true;
3572
3573 break;
3574 }
3575 case ISD::VECTOR_SHUFFLE: {
3576 SDValue LHS = Op.getOperand(0);
3577 SDValue RHS = Op.getOperand(1);
3578 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3579
3580 // Collect demanded elements from shuffle operands..
3581 APInt DemandedLHS(NumElts, 0);
3582 APInt DemandedRHS(NumElts, 0);
3583 for (unsigned i = 0; i != NumElts; ++i) {
3584 int M = ShuffleMask[i];
3585 if (M < 0 || !DemandedElts[i])
3586 continue;
3587 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3588 if (M < (int)NumElts)
3589 DemandedLHS.setBit(M);
3590 else
3591 DemandedRHS.setBit(M - NumElts);
3592 }
3593
3594 // If either side isn't demanded, replace it by UNDEF. We handle this
3595 // explicitly here to also simplify in case of multiple uses (on the
3596 // contrary to the SimplifyDemandedVectorElts calls below).
3597 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3598 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3599 if (FoldLHS || FoldRHS) {
3600 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3601 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3602 SDValue NewOp =
3603 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3604 return TLO.CombineTo(Op, NewOp);
3605 }
3606
3607 // See if we can simplify either shuffle operand.
3608 APInt UndefLHS, ZeroLHS;
3609 APInt UndefRHS, ZeroRHS;
3610 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3611 Depth + 1))
3612 return true;
3613 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3614 Depth + 1))
3615 return true;
3616
3617 // Simplify mask using undef elements from LHS/RHS.
3618 bool Updated = false;
3619 bool IdentityLHS = true, IdentityRHS = true;
3620 SmallVector<int, 32> NewMask(ShuffleMask);
3621 for (unsigned i = 0; i != NumElts; ++i) {
3622 int &M = NewMask[i];
3623 if (M < 0)
3624 continue;
3625 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3626 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3627 Updated = true;
3628 M = -1;
3629 }
3630 IdentityLHS &= (M < 0) || (M == (int)i);
3631 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3632 }
3633
3634 // Update legal shuffle masks based on demanded elements if it won't reduce
3635 // to Identity which can cause premature removal of the shuffle mask.
3636 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3637 SDValue LegalShuffle =
3638 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3639 if (LegalShuffle)
3640 return TLO.CombineTo(Op, LegalShuffle);
3641 }
3642
3643 // Propagate undef/zero elements from LHS/RHS.
3644 for (unsigned i = 0; i != NumElts; ++i) {
3645 int M = ShuffleMask[i];
3646 if (M < 0) {
3647 KnownUndef.setBit(i);
3648 } else if (M < (int)NumElts) {
3649 if (UndefLHS[M])
3650 KnownUndef.setBit(i);
3651 if (ZeroLHS[M])
3652 KnownZero.setBit(i);
3653 } else {
3654 if (UndefRHS[M - NumElts])
3655 KnownUndef.setBit(i);
3656 if (ZeroRHS[M - NumElts])
3657 KnownZero.setBit(i);
3658 }
3659 }
3660 break;
3661 }
3665 APInt SrcUndef, SrcZero;
3666 SDValue Src = Op.getOperand(0);
3667 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3668 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3669 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3670 Depth + 1))
3671 return true;
3672 KnownZero = SrcZero.zextOrTrunc(NumElts);
3673 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3674
3675 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3676 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3677 DemandedSrcElts == 1) {
3678 // aext - if we just need the bottom element then we can bitcast.
3679 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3680 }
3681
3682 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3683 // zext(undef) upper bits are guaranteed to be zero.
3684 if (DemandedElts.isSubsetOf(KnownUndef))
3685 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3686 KnownUndef.clearAllBits();
3687
3688 // zext - if we just need the bottom element then we can mask:
3689 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3690 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3691 Op->isOnlyUserOf(Src.getNode()) &&
3692 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3693 SDLoc DL(Op);
3694 EVT SrcVT = Src.getValueType();
3695 EVT SrcSVT = SrcVT.getScalarType();
3696 SmallVector<SDValue> MaskElts;
3697 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3698 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3699 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3700 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3701 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3702 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3703 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3704 }
3705 }
3706 }
3707 break;
3708 }
3709
3710 // TODO: There are more binop opcodes that could be handled here - MIN,
3711 // MAX, saturated math, etc.
3712 case ISD::ADD: {
3713 SDValue Op0 = Op.getOperand(0);
3714 SDValue Op1 = Op.getOperand(1);
3715 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3716 APInt UndefLHS, ZeroLHS;
3717 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3718 Depth + 1, /*AssumeSingleUse*/ true))
3719 return true;
3720 }
3721 [[fallthrough]];
3722 }
3723 case ISD::AVGCEILS:
3724 case ISD::AVGCEILU:
3725 case ISD::AVGFLOORS:
3726 case ISD::AVGFLOORU:
3727 case ISD::OR:
3728 case ISD::XOR:
3729 case ISD::SUB:
3730 case ISD::FADD:
3731 case ISD::FSUB:
3732 case ISD::FMUL:
3733 case ISD::FDIV:
3734 case ISD::FREM: {
3735 SDValue Op0 = Op.getOperand(0);
3736 SDValue Op1 = Op.getOperand(1);
3737
3738 APInt UndefRHS, ZeroRHS;
3739 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3740 Depth + 1))
3741 return true;
3742 APInt UndefLHS, ZeroLHS;
3743 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3744 Depth + 1))
3745 return true;
3746
3747 KnownZero = ZeroLHS & ZeroRHS;
3748 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3749
3750 // Attempt to avoid multi-use ops if we don't need anything from them.
3751 // TODO - use KnownUndef to relax the demandedelts?
3752 if (!DemandedElts.isAllOnes())
3753 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3754 return true;
3755 break;
3756 }
3757 case ISD::SHL:
3758 case ISD::SRL:
3759 case ISD::SRA:
3760 case ISD::ROTL:
3761 case ISD::ROTR: {
3762 SDValue Op0 = Op.getOperand(0);
3763 SDValue Op1 = Op.getOperand(1);
3764
3765 APInt UndefRHS, ZeroRHS;
3766 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3767 Depth + 1))
3768 return true;
3769 APInt UndefLHS, ZeroLHS;
3770 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3771 Depth + 1))
3772 return true;
3773
3774 KnownZero = ZeroLHS;
3775 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3776
3777 // Attempt to avoid multi-use ops if we don't need anything from them.
3778 // TODO - use KnownUndef to relax the demandedelts?
3779 if (!DemandedElts.isAllOnes())
3780 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3781 return true;
3782 break;
3783 }
3784 case ISD::MUL:
3785 case ISD::MULHU:
3786 case ISD::MULHS:
3787 case ISD::AND: {
3788 SDValue Op0 = Op.getOperand(0);
3789 SDValue Op1 = Op.getOperand(1);
3790
3791 APInt SrcUndef, SrcZero;
3792 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3793 Depth + 1))
3794 return true;
3795 // If we know that a demanded element was zero in Op1 we don't need to
3796 // demand it in Op0 - its guaranteed to be zero.
3797 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3798 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3799 TLO, Depth + 1))
3800 return true;
3801
3802 KnownUndef &= DemandedElts0;
3803 KnownZero &= DemandedElts0;
3804
3805 // If every element pair has a zero/undef then just fold to zero.
3806 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3807 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3808 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3809 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3810
3811 // If either side has a zero element, then the result element is zero, even
3812 // if the other is an UNDEF.
3813 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3814 // and then handle 'and' nodes with the rest of the binop opcodes.
3815 KnownZero |= SrcZero;
3816 KnownUndef &= SrcUndef;
3817 KnownUndef &= ~KnownZero;
3818
3819 // Attempt to avoid multi-use ops if we don't need anything from them.
3820 if (!DemandedElts.isAllOnes())
3821 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3822 return true;
3823 break;
3824 }
3825 case ISD::TRUNCATE:
3826 case ISD::SIGN_EXTEND:
3827 case ISD::ZERO_EXTEND:
3828 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3829 KnownZero, TLO, Depth + 1))
3830 return true;
3831
3832 if (!DemandedElts.isAllOnes())
3834 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3835 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3836
3837 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3838 // zext(undef) upper bits are guaranteed to be zero.
3839 if (DemandedElts.isSubsetOf(KnownUndef))
3840 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3841 KnownUndef.clearAllBits();
3842 }
3843 break;
3844 case ISD::SINT_TO_FP:
3845 case ISD::UINT_TO_FP:
3846 case ISD::FP_TO_SINT:
3847 case ISD::FP_TO_UINT:
3848 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3849 KnownZero, TLO, Depth + 1))
3850 return true;
3851 // Don't fall through to generic undef -> undef handling.
3852 return false;
3853 default: {
3854 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3855 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3856 KnownZero, TLO, Depth))
3857 return true;
3858 } else {
3859 KnownBits Known;
3860 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3861 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3862 TLO, Depth, AssumeSingleUse))
3863 return true;
3864 }
3865 break;
3866 }
3867 }
3868 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3869
3870 // Constant fold all undef cases.
3871 // TODO: Handle zero cases as well.
3872 if (DemandedElts.isSubsetOf(KnownUndef))
3873 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3874
3875 return false;
3876}
3877
3878/// Determine which of the bits specified in Mask are known to be either zero or
3879/// one and return them in the Known.
3881 KnownBits &Known,
3882 const APInt &DemandedElts,
3883 const SelectionDAG &DAG,
3884 unsigned Depth) const {
3885 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3886 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3887 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3888 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3889 "Should use MaskedValueIsZero if you don't know whether Op"
3890 " is a target node!");
3891 Known.resetAll();
3892}
3893
3896 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3897 unsigned Depth) const {
3898 Known.resetAll();
3899}
3900
3903 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3904 unsigned Depth) const {
3905 Known.resetAll();
3906}
3907
3909 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3910 // The low bits are known zero if the pointer is aligned.
3911 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3912}
3913
3919
3920/// This method can be implemented by targets that want to expose additional
3921/// information about sign bits to the DAG Combiner.
3923 const APInt &,
3924 const SelectionDAG &,
3925 unsigned Depth) const {
3926 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3927 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3928 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3929 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3930 "Should use ComputeNumSignBits if you don't know whether Op"
3931 " is a target node!");
3932 return 1;
3933}
3934
3936 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3937 const MachineRegisterInfo &MRI, unsigned Depth) const {
3938 return 1;
3939}
3940
3942 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3943 TargetLoweringOpt &TLO, unsigned Depth) const {
3944 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3945 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3946 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3947 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3948 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3949 " is a target node!");
3950 return false;
3951}
3952
3954 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3955 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3956 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3957 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3958 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3959 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3960 "Should use SimplifyDemandedBits if you don't know whether Op"
3961 " is a target node!");
3962 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3963 return false;
3964}
3965
3967 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3968 SelectionDAG &DAG, unsigned Depth) const {
3969 assert(
3970 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3971 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3972 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3973 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3974 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3975 " is a target node!");
3976 return SDValue();
3977}
3978
3979SDValue
3982 SelectionDAG &DAG) const {
3983 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3984 if (!LegalMask) {
3985 std::swap(N0, N1);
3987 LegalMask = isShuffleMaskLegal(Mask, VT);
3988 }
3989
3990 if (!LegalMask)
3991 return SDValue();
3992
3993 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3994}
3995
3997 return nullptr;
3998}
3999
4001 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4002 bool PoisonOnly, unsigned Depth) const {
4003 assert(
4004 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4005 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4006 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4007 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4008 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4009 " is a target node!");
4010
4011 // If Op can't create undef/poison and none of its operands are undef/poison
4012 // then Op is never undef/poison.
4013 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4014 /*ConsiderFlags*/ true, Depth) &&
4015 all_of(Op->ops(), [&](SDValue V) {
4016 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
4017 Depth + 1);
4018 });
4019}
4020
4022 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4023 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4024 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4025 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4026 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4027 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4028 "Should use canCreateUndefOrPoison if you don't know whether Op"
4029 " is a target node!");
4030 // Be conservative and return true.
4031 return true;
4032}
4033
4035 const APInt &DemandedElts,
4036 const SelectionDAG &DAG,
4037 bool SNaN,
4038 unsigned Depth) const {
4039 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4040 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4041 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4042 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4043 "Should use isKnownNeverNaN if you don't know whether Op"
4044 " is a target node!");
4045 return false;
4046}
4047
4049 const APInt &DemandedElts,
4050 APInt &UndefElts,
4051 const SelectionDAG &DAG,
4052 unsigned Depth) const {
4053 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4054 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4055 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4056 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4057 "Should use isSplatValue if you don't know whether Op"
4058 " is a target node!");
4059 return false;
4060}
4061
4062// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4063// work with truncating build vectors and vectors with elements of less than
4064// 8 bits.
4066 if (!N)
4067 return false;
4068
4069 unsigned EltWidth;
4070 APInt CVal;
4071 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4072 /*AllowTruncation=*/true)) {
4073 CVal = CN->getAPIntValue();
4074 EltWidth = N.getValueType().getScalarSizeInBits();
4075 } else
4076 return false;
4077
4078 // If this is a truncating splat, truncate the splat value.
4079 // Otherwise, we may fail to match the expected values below.
4080 if (EltWidth < CVal.getBitWidth())
4081 CVal = CVal.trunc(EltWidth);
4082
4083 switch (getBooleanContents(N.getValueType())) {
4085 return CVal[0];
4087 return CVal.isOne();
4089 return CVal.isAllOnes();
4090 }
4091
4092 llvm_unreachable("Invalid boolean contents");
4093}
4094
4096 if (!N)
4097 return false;
4098
4100 if (!CN) {
4102 if (!BV)
4103 return false;
4104
4105 // Only interested in constant splats, we don't care about undef
4106 // elements in identifying boolean constants and getConstantSplatNode
4107 // returns NULL if all ops are undef;
4108 CN = BV->getConstantSplatNode();
4109 if (!CN)
4110 return false;
4111 }
4112
4113 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4114 return !CN->getAPIntValue()[0];
4115
4116 return CN->isZero();
4117}
4118
4120 bool SExt) const {
4121 if (VT == MVT::i1)
4122 return N->isOne();
4123
4125 switch (Cnt) {
4127 // An extended value of 1 is always true, unless its original type is i1,
4128 // in which case it will be sign extended to -1.
4129 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4132 return N->isAllOnes() && SExt;
4133 }
4134 llvm_unreachable("Unexpected enumeration.");
4135}
4136
4137/// This helper function of SimplifySetCC tries to optimize the comparison when
4138/// either operand of the SetCC node is a bitwise-and instruction.
4139SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4140 ISD::CondCode Cond, const SDLoc &DL,
4141 DAGCombinerInfo &DCI) const {
4142 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4143 std::swap(N0, N1);
4144
4145 SelectionDAG &DAG = DCI.DAG;
4146 EVT OpVT = N0.getValueType();
4147 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4148 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4149 return SDValue();
4150
4151 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4152 // iff everything but LSB is known zero:
4153 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4156 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4157 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4158 if (DAG.MaskedValueIsZero(N0, UpperBits))
4159 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4160 }
4161
4162 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4163 // test in a narrow type that we can truncate to with no cost. Examples:
4164 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4165 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4166 // TODO: This conservatively checks for type legality on the source and
4167 // destination types. That may inhibit optimizations, but it also
4168 // allows setcc->shift transforms that may be more beneficial.
4169 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4170 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4171 isTypeLegal(OpVT) && N0.hasOneUse()) {
4172 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4173 AndC->getAPIntValue().getActiveBits());
4174 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4175 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4176 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4177 return DAG.getSetCC(DL, VT, Trunc, Zero,
4179 }
4180 }
4181
4182 // Match these patterns in any of their permutations:
4183 // (X & Y) == Y
4184 // (X & Y) != Y
4185 SDValue X, Y;
4186 if (N0.getOperand(0) == N1) {
4187 X = N0.getOperand(1);
4188 Y = N0.getOperand(0);
4189 } else if (N0.getOperand(1) == N1) {
4190 X = N0.getOperand(0);
4191 Y = N0.getOperand(1);
4192 } else {
4193 return SDValue();
4194 }
4195
4196 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4197 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4198 // its liable to create and infinite loop.
4199 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4200 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4202 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4203 // Note that where Y is variable and is known to have at most one bit set
4204 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4205 // equivalent when Y == 0.
4206 assert(OpVT.isInteger());
4208 if (DCI.isBeforeLegalizeOps() ||
4210 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4211 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4212 // If the target supports an 'and-not' or 'and-complement' logic operation,
4213 // try to use that to make a comparison operation more efficient.
4214 // But don't do this transform if the mask is a single bit because there are
4215 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4216 // 'rlwinm' on PPC).
4217
4218 // Bail out if the compare operand that we want to turn into a zero is
4219 // already a zero (otherwise, infinite loop).
4220 if (isNullConstant(Y))
4221 return SDValue();
4222
4223 // Transform this into: ~X & Y == 0.
4224 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4225 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4226 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4227 }
4228
4229 return SDValue();
4230}
4231
4232/// This helper function of SimplifySetCC tries to optimize the comparison when
4233/// either operand of the SetCC node is a bitwise-or instruction.
4234/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4235SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4236 ISD::CondCode Cond, const SDLoc &DL,
4237 DAGCombinerInfo &DCI) const {
4238 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4239 std::swap(N0, N1);
4240
4241 SelectionDAG &DAG = DCI.DAG;
4242 EVT OpVT = N0.getValueType();
4243 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4244 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4245 return SDValue();
4246
4247 // (X | Y) == Y
4248 // (X | Y) != Y
4249 SDValue X;
4250 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4251 // If the target supports an 'and-not' or 'and-complement' logic operation,
4252 // try to use that to make a comparison operation more efficient.
4253
4254 // Bail out if the compare operand that we want to turn into a zero is
4255 // already a zero (otherwise, infinite loop).
4256 if (isNullConstant(N1))
4257 return SDValue();
4258
4259 // Transform this into: X & ~Y ==/!= 0.
4260 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4261 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4262 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4263 }
4264
4265 return SDValue();
4266}
4267
4268/// There are multiple IR patterns that could be checking whether certain
4269/// truncation of a signed number would be lossy or not. The pattern which is
4270/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4271/// We are looking for the following pattern: (KeptBits is a constant)
4272/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4273/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4274/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4275/// We will unfold it into the natural trunc+sext pattern:
4276/// ((%x << C) a>> C) dstcond %x
4277/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4278SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4279 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4280 const SDLoc &DL) const {
4281 // We must be comparing with a constant.
4282 ConstantSDNode *C1;
4283 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4284 return SDValue();
4285
4286 // N0 should be: add %x, (1 << (KeptBits-1))
4287 if (N0->getOpcode() != ISD::ADD)
4288 return SDValue();
4289
4290 // And we must be 'add'ing a constant.
4291 ConstantSDNode *C01;
4292 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4293 return SDValue();
4294
4295 SDValue X = N0->getOperand(0);
4296 EVT XVT = X.getValueType();
4297
4298 // Validate constants ...
4299
4300 APInt I1 = C1->getAPIntValue();
4301
4302 ISD::CondCode NewCond;
4303 if (Cond == ISD::CondCode::SETULT) {
4304 NewCond = ISD::CondCode::SETEQ;
4305 } else if (Cond == ISD::CondCode::SETULE) {
4306 NewCond = ISD::CondCode::SETEQ;
4307 // But need to 'canonicalize' the constant.
4308 I1 += 1;
4309 } else if (Cond == ISD::CondCode::SETUGT) {
4310 NewCond = ISD::CondCode::SETNE;
4311 // But need to 'canonicalize' the constant.
4312 I1 += 1;
4313 } else if (Cond == ISD::CondCode::SETUGE) {
4314 NewCond = ISD::CondCode::SETNE;
4315 } else
4316 return SDValue();
4317
4318 APInt I01 = C01->getAPIntValue();
4319
4320 auto checkConstants = [&I1, &I01]() -> bool {
4321 // Both of them must be power-of-two, and the constant from setcc is bigger.
4322 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4323 };
4324
4325 if (checkConstants()) {
4326 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4327 } else {
4328 // What if we invert constants? (and the target predicate)
4329 I1.negate();
4330 I01.negate();
4331 assert(XVT.isInteger());
4332 NewCond = getSetCCInverse(NewCond, XVT);
4333 if (!checkConstants())
4334 return SDValue();
4335 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4336 }
4337
4338 // They are power-of-two, so which bit is set?
4339 const unsigned KeptBits = I1.logBase2();
4340 const unsigned KeptBitsMinusOne = I01.logBase2();
4341
4342 // Magic!
4343 if (KeptBits != (KeptBitsMinusOne + 1))
4344 return SDValue();
4345 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4346
4347 // We don't want to do this in every single case.
4348 SelectionDAG &DAG = DCI.DAG;
4349 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4350 return SDValue();
4351
4352 // Unfold into: sext_inreg(%x) cond %x
4353 // Where 'cond' will be either 'eq' or 'ne'.
4354 SDValue SExtInReg = DAG.getNode(
4356 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4357 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4358}
4359
4360// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4361SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4362 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4363 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4365 "Should be a comparison with 0.");
4366 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4367 "Valid only for [in]equality comparisons.");
4368
4369 unsigned NewShiftOpcode;
4370 SDValue X, C, Y;
4371
4372 SelectionDAG &DAG = DCI.DAG;
4373
4374 // Look for '(C l>>/<< Y)'.
4375 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4376 // The shift should be one-use.
4377 if (!V.hasOneUse())
4378 return false;
4379 unsigned OldShiftOpcode = V.getOpcode();
4380 switch (OldShiftOpcode) {
4381 case ISD::SHL:
4382 NewShiftOpcode = ISD::SRL;
4383 break;
4384 case ISD::SRL:
4385 NewShiftOpcode = ISD::SHL;
4386 break;
4387 default:
4388 return false; // must be a logical shift.
4389 }
4390 // We should be shifting a constant.
4391 // FIXME: best to use isConstantOrConstantVector().
4392 C = V.getOperand(0);
4393 ConstantSDNode *CC =
4394 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4395 if (!CC)
4396 return false;
4397 Y = V.getOperand(1);
4398
4399 ConstantSDNode *XC =
4400 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4402 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4403 };
4404
4405 // LHS of comparison should be an one-use 'and'.
4406 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4407 return SDValue();
4408
4409 X = N0.getOperand(0);
4410 SDValue Mask = N0.getOperand(1);
4411
4412 // 'and' is commutative!
4413 if (!Match(Mask)) {
4414 std::swap(X, Mask);
4415 if (!Match(Mask))
4416 return SDValue();
4417 }
4418
4419 EVT VT = X.getValueType();
4420
4421 // Produce:
4422 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4423 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4424 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4425 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4426 return T2;
4427}
4428
4429/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4430/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4431/// handle the commuted versions of these patterns.
4432SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4433 ISD::CondCode Cond, const SDLoc &DL,
4434 DAGCombinerInfo &DCI) const {
4435 unsigned BOpcode = N0.getOpcode();
4436 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4437 "Unexpected binop");
4438 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4439
4440 // (X + Y) == X --> Y == 0
4441 // (X - Y) == X --> Y == 0
4442 // (X ^ Y) == X --> Y == 0
4443 SelectionDAG &DAG = DCI.DAG;
4444 EVT OpVT = N0.getValueType();
4445 SDValue X = N0.getOperand(0);
4446 SDValue Y = N0.getOperand(1);
4447 if (X == N1)
4448 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4449
4450 if (Y != N1)
4451 return SDValue();
4452
4453 // (X + Y) == Y --> X == 0
4454 // (X ^ Y) == Y --> X == 0
4455 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4456 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4457
4458 // The shift would not be valid if the operands are boolean (i1).
4459 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4460 return SDValue();
4461
4462 // (X - Y) == Y --> X == Y << 1
4463 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4464 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4465 if (!DCI.isCalledByLegalizer())
4466 DCI.AddToWorklist(YShl1.getNode());
4467 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4468}
4469
4471 SDValue N0, const APInt &C1,
4472 ISD::CondCode Cond, const SDLoc &dl,
4473 SelectionDAG &DAG) {
4474 // Look through truncs that don't change the value of a ctpop.
4475 // FIXME: Add vector support? Need to be careful with setcc result type below.
4476 SDValue CTPOP = N0;
4477 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4479 CTPOP = N0.getOperand(0);
4480
4481 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4482 return SDValue();
4483
4484 EVT CTVT = CTPOP.getValueType();
4485 SDValue CTOp = CTPOP.getOperand(0);
4486
4487 // Expand a power-of-2-or-zero comparison based on ctpop:
4488 // (ctpop x) u< 2 -> (x & x-1) == 0
4489 // (ctpop x) u> 1 -> (x & x-1) != 0
4490 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4491 // Keep the CTPOP if it is a cheap vector op.
4492 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4493 return SDValue();
4494
4495 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4496 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4497 return SDValue();
4498 if (C1 == 0 && (Cond == ISD::SETULT))
4499 return SDValue(); // This is handled elsewhere.
4500
4501 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4502
4503 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4504 SDValue Result = CTOp;
4505 for (unsigned i = 0; i < Passes; i++) {
4506 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4507 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4508 }
4510 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4511 }
4512
4513 // Expand a power-of-2 comparison based on ctpop
4514 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4515 // Keep the CTPOP if it is cheap.
4516 if (TLI.isCtpopFast(CTVT))
4517 return SDValue();
4518
4519 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4520 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4521 assert(CTVT.isInteger());
4522 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4523
4524 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4525 // check before emitting a potentially unnecessary op.
4526 if (DAG.isKnownNeverZero(CTOp)) {
4527 // (ctpop x) == 1 --> (x & x-1) == 0
4528 // (ctpop x) != 1 --> (x & x-1) != 0
4529 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4530 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4531 return RHS;
4532 }
4533
4534 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4535 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4536 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4538 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4539 }
4540
4541 return SDValue();
4542}
4543
4545 ISD::CondCode Cond, const SDLoc &dl,
4546 SelectionDAG &DAG) {
4547 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4548 return SDValue();
4549
4550 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4551 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4552 return SDValue();
4553
4554 auto getRotateSource = [](SDValue X) {
4555 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4556 return X.getOperand(0);
4557 return SDValue();
4558 };
4559
4560 // Peek through a rotated value compared against 0 or -1:
4561 // (rot X, Y) == 0/-1 --> X == 0/-1
4562 // (rot X, Y) != 0/-1 --> X != 0/-1
4563 if (SDValue R = getRotateSource(N0))
4564 return DAG.getSetCC(dl, VT, R, N1, Cond);
4565
4566 // Peek through an 'or' of a rotated value compared against 0:
4567 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4568 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4569 //
4570 // TODO: Add the 'and' with -1 sibling.
4571 // TODO: Recurse through a series of 'or' ops to find the rotate.
4572 EVT OpVT = N0.getValueType();
4573 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4574 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4575 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4576 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4577 }
4578 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4579 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4580 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4581 }
4582 }
4583
4584 return SDValue();
4585}
4586
4588 ISD::CondCode Cond, const SDLoc &dl,
4589 SelectionDAG &DAG) {
4590 // If we are testing for all-bits-clear, we might be able to do that with
4591 // less shifting since bit-order does not matter.
4592 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4593 return SDValue();
4594
4595 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4596 if (!C1 || !C1->isZero())
4597 return SDValue();
4598
4599 if (!N0.hasOneUse() ||
4600 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4601 return SDValue();
4602
4603 unsigned BitWidth = N0.getScalarValueSizeInBits();
4604 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4605 if (!ShAmtC)
4606 return SDValue();
4607
4608 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4609 if (ShAmt == 0)
4610 return SDValue();
4611
4612 // Canonicalize fshr as fshl to reduce pattern-matching.
4613 if (N0.getOpcode() == ISD::FSHR)
4614 ShAmt = BitWidth - ShAmt;
4615
4616 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4617 SDValue X, Y;
4618 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4619 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4620 return false;
4621 if (Or.getOperand(0) == Other) {
4622 X = Or.getOperand(0);
4623 Y = Or.getOperand(1);
4624 return true;
4625 }
4626 if (Or.getOperand(1) == Other) {
4627 X = Or.getOperand(1);
4628 Y = Or.getOperand(0);
4629 return true;
4630 }
4631 return false;
4632 };
4633
4634 EVT OpVT = N0.getValueType();
4635 EVT ShAmtVT = N0.getOperand(2).getValueType();
4636 SDValue F0 = N0.getOperand(0);
4637 SDValue F1 = N0.getOperand(1);
4638 if (matchOr(F0, F1)) {
4639 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4640 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4641 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4642 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4643 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4644 }
4645 if (matchOr(F1, F0)) {
4646 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4647 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4648 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4649 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4650 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4651 }
4652
4653 return SDValue();
4654}
4655
4656/// Try to simplify a setcc built with the specified operands and cc. If it is
4657/// unable to simplify it, return a null SDValue.
4659 ISD::CondCode Cond, bool foldBooleans,
4660 DAGCombinerInfo &DCI,
4661 const SDLoc &dl) const {
4662 SelectionDAG &DAG = DCI.DAG;
4663 const DataLayout &Layout = DAG.getDataLayout();
4664 EVT OpVT = N0.getValueType();
4665 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4666
4667 // Constant fold or commute setcc.
4668 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4669 return Fold;
4670
4671 bool N0ConstOrSplat =
4672 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4673 bool N1ConstOrSplat =
4674 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4675
4676 // Canonicalize toward having the constant on the RHS.
4677 // TODO: Handle non-splat vector constants. All undef causes trouble.
4678 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4679 // infinite loop here when we encounter one.
4681 if (N0ConstOrSplat && !N1ConstOrSplat &&
4682 (DCI.isBeforeLegalizeOps() ||
4683 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4684 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4685
4686 // If we have a subtract with the same 2 non-constant operands as this setcc
4687 // -- but in reverse order -- then try to commute the operands of this setcc
4688 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4689 // instruction on some targets.
4690 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4691 (DCI.isBeforeLegalizeOps() ||
4692 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4693 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4694 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4695 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4696
4697 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4698 return V;
4699
4700 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4701 return V;
4702
4703 if (auto *N1C = isConstOrConstSplat(N1)) {
4704 const APInt &C1 = N1C->getAPIntValue();
4705
4706 // Optimize some CTPOP cases.
4707 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4708 return V;
4709
4710 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4711 // X * Y == 0 --> (X == 0) || (Y == 0)
4712 // X * Y != 0 --> (X != 0) && (Y != 0)
4713 // TODO: This bails out if minsize is set, but if the target doesn't have a
4714 // single instruction multiply for this type, it would likely be
4715 // smaller to decompose.
4716 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4717 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4718 (N0->getFlags().hasNoUnsignedWrap() ||
4719 N0->getFlags().hasNoSignedWrap()) &&
4720 !Attr.hasFnAttr(Attribute::MinSize)) {
4721 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4722 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4723 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4724 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4725 }
4726
4727 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4728 // equality comparison, then we're just comparing whether X itself is
4729 // zero.
4730 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4731 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4733 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4734 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4735 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4736 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4737 // (srl (ctlz x), 5) == 0 -> X != 0
4738 // (srl (ctlz x), 5) != 1 -> X != 0
4739 Cond = ISD::SETNE;
4740 } else {
4741 // (srl (ctlz x), 5) != 0 -> X == 0
4742 // (srl (ctlz x), 5) == 1 -> X == 0
4743 Cond = ISD::SETEQ;
4744 }
4745 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4746 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4747 Cond);
4748 }
4749 }
4750 }
4751 }
4752
4753 // FIXME: Support vectors.
4754 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4755 const APInt &C1 = N1C->getAPIntValue();
4756
4757 // (zext x) == C --> x == (trunc C)
4758 // (sext x) == C --> x == (trunc C)
4759 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4760 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4761 unsigned MinBits = N0.getValueSizeInBits();
4762 SDValue PreExt;
4763 bool Signed = false;
4764 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4765 // ZExt
4766 MinBits = N0->getOperand(0).getValueSizeInBits();
4767 PreExt = N0->getOperand(0);
4768 } else if (N0->getOpcode() == ISD::AND) {
4769 // DAGCombine turns costly ZExts into ANDs
4770 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4771 if ((C->getAPIntValue()+1).isPowerOf2()) {
4772 MinBits = C->getAPIntValue().countr_one();
4773 PreExt = N0->getOperand(0);
4774 }
4775 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4776 // SExt
4777 MinBits = N0->getOperand(0).getValueSizeInBits();
4778 PreExt = N0->getOperand(0);
4779 Signed = true;
4780 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4781 // ZEXTLOAD / SEXTLOAD
4782 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4783 MinBits = LN0->getMemoryVT().getSizeInBits();
4784 PreExt = N0;
4785 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4786 Signed = true;
4787 MinBits = LN0->getMemoryVT().getSizeInBits();
4788 PreExt = N0;
4789 }
4790 }
4791
4792 // Figure out how many bits we need to preserve this constant.
4793 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4794
4795 // Make sure we're not losing bits from the constant.
4796 if (MinBits > 0 &&
4797 MinBits < C1.getBitWidth() &&
4798 MinBits >= ReqdBits) {
4799 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4800 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4801 // Will get folded away.
4802 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4803 if (MinBits == 1 && C1 == 1)
4804 // Invert the condition.
4805 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4807 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4808 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4809 }
4810
4811 // If truncating the setcc operands is not desirable, we can still
4812 // simplify the expression in some cases:
4813 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4814 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4815 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4816 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4817 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4818 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4819 SDValue TopSetCC = N0->getOperand(0);
4820 unsigned N0Opc = N0->getOpcode();
4821 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4822 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4823 TopSetCC.getOpcode() == ISD::SETCC &&
4824 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4825 (isConstFalseVal(N1) ||
4826 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4827
4828 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4829 (!N1C->isZero() && Cond == ISD::SETNE);
4830
4831 if (!Inverse)
4832 return TopSetCC;
4833
4835 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4836 TopSetCC.getOperand(0).getValueType());
4837 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4838 TopSetCC.getOperand(1),
4839 InvCond);
4840 }
4841 }
4842 }
4843
4844 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4845 // equality or unsigned, and all 1 bits of the const are in the same
4846 // partial word, see if we can shorten the load.
4847 if (DCI.isBeforeLegalize() &&
4849 N0.getOpcode() == ISD::AND && C1 == 0 &&
4850 N0.getNode()->hasOneUse() &&
4851 isa<LoadSDNode>(N0.getOperand(0)) &&
4852 N0.getOperand(0).getNode()->hasOneUse() &&
4854 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4855 APInt bestMask;
4856 unsigned bestWidth = 0, bestOffset = 0;
4857 if (Lod->isSimple() && Lod->isUnindexed() &&
4858 (Lod->getMemoryVT().isByteSized() ||
4859 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4860 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4861 unsigned origWidth = N0.getValueSizeInBits();
4862 unsigned maskWidth = origWidth;
4863 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4864 // 8 bits, but have to be careful...
4865 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4866 origWidth = Lod->getMemoryVT().getSizeInBits();
4867 const APInt &Mask = N0.getConstantOperandAPInt(1);
4868 // Only consider power-of-2 widths (and at least one byte) as candiates
4869 // for the narrowed load.
4870 for (unsigned width = 8; width < origWidth; width *= 2) {
4871 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4872 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4873 // Avoid accessing any padding here for now (we could use memWidth
4874 // instead of origWidth here otherwise).
4875 unsigned maxOffset = origWidth - width;
4876 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4877 if (Mask.isSubsetOf(newMask)) {
4878 unsigned ptrOffset =
4879 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4880 unsigned IsFast = 0;
4881 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4882 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4884 ptrOffset / 8) &&
4886 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4887 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4888 IsFast) {
4889 bestOffset = ptrOffset / 8;
4890 bestMask = Mask.lshr(offset);
4891 bestWidth = width;
4892 break;
4893 }
4894 }
4895 newMask <<= 8;
4896 }
4897 if (bestWidth)
4898 break;
4899 }
4900 }
4901 if (bestWidth) {
4902 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4903 SDValue Ptr = Lod->getBasePtr();
4904 if (bestOffset != 0)
4905 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4906 SDValue NewLoad =
4907 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4908 Lod->getPointerInfo().getWithOffset(bestOffset),
4909 Lod->getBaseAlign());
4910 SDValue And =
4911 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4912 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4913 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4914 }
4915 }
4916
4917 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4918 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4919 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4920
4921 // If the comparison constant has bits in the upper part, the
4922 // zero-extended value could never match.
4924 C1.getBitWidth() - InSize))) {
4925 switch (Cond) {
4926 case ISD::SETUGT:
4927 case ISD::SETUGE:
4928 case ISD::SETEQ:
4929 return DAG.getConstant(0, dl, VT);
4930 case ISD::SETULT:
4931 case ISD::SETULE:
4932 case ISD::SETNE:
4933 return DAG.getConstant(1, dl, VT);
4934 case ISD::SETGT:
4935 case ISD::SETGE:
4936 // True if the sign bit of C1 is set.
4937 return DAG.getConstant(C1.isNegative(), dl, VT);
4938 case ISD::SETLT:
4939 case ISD::SETLE:
4940 // True if the sign bit of C1 isn't set.
4941 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4942 default:
4943 break;
4944 }
4945 }
4946
4947 // Otherwise, we can perform the comparison with the low bits.
4948 switch (Cond) {
4949 case ISD::SETEQ:
4950 case ISD::SETNE:
4951 case ISD::SETUGT:
4952 case ISD::SETUGE:
4953 case ISD::SETULT:
4954 case ISD::SETULE: {
4955 EVT newVT = N0.getOperand(0).getValueType();
4956 // FIXME: Should use isNarrowingProfitable.
4957 if (DCI.isBeforeLegalizeOps() ||
4958 (isOperationLegal(ISD::SETCC, newVT) &&
4959 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4961 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4962 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4963
4964 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4965 NewConst, Cond);
4966 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4967 }
4968 break;
4969 }
4970 default:
4971 break; // todo, be more careful with signed comparisons
4972 }
4973 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4974 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4976 OpVT)) {
4977 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4978 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4979 EVT ExtDstTy = N0.getValueType();
4980 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4981
4982 // If the constant doesn't fit into the number of bits for the source of
4983 // the sign extension, it is impossible for both sides to be equal.
4984 if (C1.getSignificantBits() > ExtSrcTyBits)
4985 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4986
4987 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4988 ExtDstTy != ExtSrcTy && "Unexpected types!");
4989 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4990 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4991 DAG.getConstant(Imm, dl, ExtDstTy));
4992 if (!DCI.isCalledByLegalizer())
4993 DCI.AddToWorklist(ZextOp.getNode());
4994 // Otherwise, make this a use of a zext.
4995 return DAG.getSetCC(dl, VT, ZextOp,
4996 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4997 } else if ((N1C->isZero() || N1C->isOne()) &&
4998 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4999 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5000 // excluded as they are handled below whilst checking for foldBooleans.
5001 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5002 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
5003 (N0.getValueType() == MVT::i1 ||
5007 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5008 if (TrueWhenTrue)
5009 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
5010 // Invert the condition.
5011 if (N0.getOpcode() == ISD::SETCC) {
5014 if (DCI.isBeforeLegalizeOps() ||
5016 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5017 }
5018 }
5019
5020 if ((N0.getOpcode() == ISD::XOR ||
5021 (N0.getOpcode() == ISD::AND &&
5022 N0.getOperand(0).getOpcode() == ISD::XOR &&
5023 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5024 isOneConstant(N0.getOperand(1))) {
5025 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5026 // can only do this if the top bits are known zero.
5027 unsigned BitWidth = N0.getValueSizeInBits();
5028 if (DAG.MaskedValueIsZero(N0,
5030 BitWidth-1))) {
5031 // Okay, get the un-inverted input value.
5032 SDValue Val;
5033 if (N0.getOpcode() == ISD::XOR) {
5034 Val = N0.getOperand(0);
5035 } else {
5036 assert(N0.getOpcode() == ISD::AND &&
5037 N0.getOperand(0).getOpcode() == ISD::XOR);
5038 // ((X^1)&1)^1 -> X & 1
5039 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5040 N0.getOperand(0).getOperand(0),
5041 N0.getOperand(1));
5042 }
5043
5044 return DAG.getSetCC(dl, VT, Val, N1,
5046 }
5047 } else if (N1C->isOne()) {
5048 SDValue Op0 = N0;
5049 if (Op0.getOpcode() == ISD::TRUNCATE)
5050 Op0 = Op0.getOperand(0);
5051
5052 if ((Op0.getOpcode() == ISD::XOR) &&
5053 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5054 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5055 SDValue XorLHS = Op0.getOperand(0);
5056 SDValue XorRHS = Op0.getOperand(1);
5057 // Ensure that the input setccs return an i1 type or 0/1 value.
5058 if (Op0.getValueType() == MVT::i1 ||
5063 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5065 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5066 }
5067 }
5068 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5069 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5070 if (Op0.getValueType().bitsGT(VT))
5071 Op0 = DAG.getNode(ISD::AND, dl, VT,
5072 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5073 DAG.getConstant(1, dl, VT));
5074 else if (Op0.getValueType().bitsLT(VT))
5075 Op0 = DAG.getNode(ISD::AND, dl, VT,
5076 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5077 DAG.getConstant(1, dl, VT));
5078
5079 return DAG.getSetCC(dl, VT, Op0,
5080 DAG.getConstant(0, dl, Op0.getValueType()),
5082 }
5083 if (Op0.getOpcode() == ISD::AssertZext &&
5084 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5085 return DAG.getSetCC(dl, VT, Op0,
5086 DAG.getConstant(0, dl, Op0.getValueType()),
5088 }
5089 }
5090
5091 // Given:
5092 // icmp eq/ne (urem %x, %y), 0
5093 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5094 // icmp eq/ne %x, 0
5095 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5096 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5097 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5098 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5099 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5100 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5101 }
5102
5103 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5104 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5105 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5107 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5108 N1C->isAllOnes()) {
5109 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5110 DAG.getConstant(0, dl, OpVT),
5112 }
5113
5114 // fold (setcc (trunc x) c) -> (setcc x c)
5115 if (N0.getOpcode() == ISD::TRUNCATE &&
5117 (N0->getFlags().hasNoSignedWrap() &&
5120 EVT NewVT = N0.getOperand(0).getValueType();
5121 SDValue NewConst = DAG.getConstant(
5123 ? C1.sext(NewVT.getSizeInBits())
5124 : C1.zext(NewVT.getSizeInBits()),
5125 dl, NewVT);
5126 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5127 }
5128
5129 if (SDValue V =
5130 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5131 return V;
5132 }
5133
5134 // These simplifications apply to splat vectors as well.
5135 // TODO: Handle more splat vector cases.
5136 if (auto *N1C = isConstOrConstSplat(N1)) {
5137 const APInt &C1 = N1C->getAPIntValue();
5138
5139 APInt MinVal, MaxVal;
5140 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5142 MinVal = APInt::getSignedMinValue(OperandBitSize);
5143 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5144 } else {
5145 MinVal = APInt::getMinValue(OperandBitSize);
5146 MaxVal = APInt::getMaxValue(OperandBitSize);
5147 }
5148
5149 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5150 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5151 // X >= MIN --> true
5152 if (C1 == MinVal)
5153 return DAG.getBoolConstant(true, dl, VT, OpVT);
5154
5155 if (!VT.isVector()) { // TODO: Support this for vectors.
5156 // X >= C0 --> X > (C0 - 1)
5157 APInt C = C1 - 1;
5159 if ((DCI.isBeforeLegalizeOps() ||
5160 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5161 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5162 isLegalICmpImmediate(C.getSExtValue())))) {
5163 return DAG.getSetCC(dl, VT, N0,
5164 DAG.getConstant(C, dl, N1.getValueType()),
5165 NewCC);
5166 }
5167 }
5168 }
5169
5170 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5171 // X <= MAX --> true
5172 if (C1 == MaxVal)
5173 return DAG.getBoolConstant(true, dl, VT, OpVT);
5174
5175 // X <= C0 --> X < (C0 + 1)
5176 if (!VT.isVector()) { // TODO: Support this for vectors.
5177 APInt C = C1 + 1;
5179 if ((DCI.isBeforeLegalizeOps() ||
5180 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5181 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5182 isLegalICmpImmediate(C.getSExtValue())))) {
5183 return DAG.getSetCC(dl, VT, N0,
5184 DAG.getConstant(C, dl, N1.getValueType()),
5185 NewCC);
5186 }
5187 }
5188 }
5189
5190 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5191 if (C1 == MinVal)
5192 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5193
5194 // TODO: Support this for vectors after legalize ops.
5195 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5196 // Canonicalize setlt X, Max --> setne X, Max
5197 if (C1 == MaxVal)
5198 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5199
5200 // If we have setult X, 1, turn it into seteq X, 0
5201 if (C1 == MinVal+1)
5202 return DAG.getSetCC(dl, VT, N0,
5203 DAG.getConstant(MinVal, dl, N0.getValueType()),
5204 ISD::SETEQ);
5205 }
5206 }
5207
5208 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5209 if (C1 == MaxVal)
5210 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5211
5212 // TODO: Support this for vectors after legalize ops.
5213 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5214 // Canonicalize setgt X, Min --> setne X, Min
5215 if (C1 == MinVal)
5216 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5217
5218 // If we have setugt X, Max-1, turn it into seteq X, Max
5219 if (C1 == MaxVal-1)
5220 return DAG.getSetCC(dl, VT, N0,
5221 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5222 ISD::SETEQ);
5223 }
5224 }
5225
5226 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5227 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5228 if (C1.isZero())
5229 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5230 VT, N0, N1, Cond, DCI, dl))
5231 return CC;
5232
5233 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5234 // For example, when high 32-bits of i64 X are known clear:
5235 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5236 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5237 bool CmpZero = N1C->isZero();
5238 bool CmpNegOne = N1C->isAllOnes();
5239 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5240 // Match or(lo,shl(hi,bw/2)) pattern.
5241 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5242 unsigned EltBits = V.getScalarValueSizeInBits();
5243 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5244 return false;
5245 SDValue LHS = V.getOperand(0);
5246 SDValue RHS = V.getOperand(1);
5247 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5248 // Unshifted element must have zero upperbits.
5249 if (RHS.getOpcode() == ISD::SHL &&
5250 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5251 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5252 DAG.MaskedValueIsZero(LHS, HiBits)) {
5253 Lo = LHS;
5254 Hi = RHS.getOperand(0);
5255 return true;
5256 }
5257 if (LHS.getOpcode() == ISD::SHL &&
5258 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5259 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5260 DAG.MaskedValueIsZero(RHS, HiBits)) {
5261 Lo = RHS;
5262 Hi = LHS.getOperand(0);
5263 return true;
5264 }
5265 return false;
5266 };
5267
5268 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5269 unsigned EltBits = N0.getScalarValueSizeInBits();
5270 unsigned HalfBits = EltBits / 2;
5271 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5272 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5273 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5274 SDValue NewN0 =
5275 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5276 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5277 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5278 };
5279
5280 SDValue Lo, Hi;
5281 if (IsConcat(N0, Lo, Hi))
5282 return MergeConcat(Lo, Hi);
5283
5284 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5285 SDValue Lo0, Lo1, Hi0, Hi1;
5286 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5287 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5288 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5289 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5290 }
5291 }
5292 }
5293 }
5294
5295 // If we have "setcc X, C0", check to see if we can shrink the immediate
5296 // by changing cc.
5297 // TODO: Support this for vectors after legalize ops.
5298 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5299 // SETUGT X, SINTMAX -> SETLT X, 0
5300 // SETUGE X, SINTMIN -> SETLT X, 0
5301 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5302 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5303 return DAG.getSetCC(dl, VT, N0,
5304 DAG.getConstant(0, dl, N1.getValueType()),
5305 ISD::SETLT);
5306
5307 // SETULT X, SINTMIN -> SETGT X, -1
5308 // SETULE X, SINTMAX -> SETGT X, -1
5309 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5310 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5311 return DAG.getSetCC(dl, VT, N0,
5312 DAG.getAllOnesConstant(dl, N1.getValueType()),
5313 ISD::SETGT);
5314 }
5315 }
5316
5317 // Back to non-vector simplifications.
5318 // TODO: Can we do these for vector splats?
5319 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5320 const APInt &C1 = N1C->getAPIntValue();
5321 EVT ShValTy = N0.getValueType();
5322
5323 // Fold bit comparisons when we can. This will result in an
5324 // incorrect value when boolean false is negative one, unless
5325 // the bitsize is 1 in which case the false value is the same
5326 // in practice regardless of the representation.
5327 if ((VT.getSizeInBits() == 1 ||
5329 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5330 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5331 N0.getOpcode() == ISD::AND) {
5332 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5333 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5334 // Perform the xform if the AND RHS is a single bit.
5335 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5336 if (AndRHS->getAPIntValue().isPowerOf2() &&
5337 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5338 return DAG.getNode(
5339 ISD::TRUNCATE, dl, VT,
5340 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5341 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5342 }
5343 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5344 // (X & 8) == 8 --> (X & 8) >> 3
5345 // Perform the xform if C1 is a single bit.
5346 unsigned ShCt = C1.logBase2();
5347 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5348 return DAG.getNode(
5349 ISD::TRUNCATE, dl, VT,
5350 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5351 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5352 }
5353 }
5354 }
5355 }
5356
5357 if (C1.getSignificantBits() <= 64 &&
5359 // (X & -256) == 256 -> (X >> 8) == 1
5360 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5361 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5362 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5363 const APInt &AndRHSC = AndRHS->getAPIntValue();
5364 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5365 unsigned ShiftBits = AndRHSC.countr_zero();
5366 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5367 // If using an unsigned shift doesn't yield a legal compare
5368 // immediate, try using sra instead.
5369 APInt NewC = C1.lshr(ShiftBits);
5370 if (NewC.getSignificantBits() <= 64 &&
5372 APInt SignedC = C1.ashr(ShiftBits);
5373 if (SignedC.getSignificantBits() <= 64 &&
5375 SDValue Shift = DAG.getNode(
5376 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5377 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5378 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5379 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5380 }
5381 }
5382 SDValue Shift = DAG.getNode(
5383 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5384 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5385 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5386 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5387 }
5388 }
5389 }
5390 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5391 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5392 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5393 // X < 0x100000000 -> (X >> 32) < 1
5394 // X >= 0x100000000 -> (X >> 32) >= 1
5395 // X <= 0x0ffffffff -> (X >> 32) < 1
5396 // X > 0x0ffffffff -> (X >> 32) >= 1
5397 unsigned ShiftBits;
5398 APInt NewC = C1;
5399 ISD::CondCode NewCond = Cond;
5400 if (AdjOne) {
5401 ShiftBits = C1.countr_one();
5402 NewC = NewC + 1;
5403 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5404 } else {
5405 ShiftBits = C1.countr_zero();
5406 }
5407 NewC.lshrInPlace(ShiftBits);
5408 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5410 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5411 SDValue Shift =
5412 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5413 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5414 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5415 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5416 }
5417 }
5418 }
5419 }
5420
5422 auto *CFP = cast<ConstantFPSDNode>(N1);
5423 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5424
5425 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5426 // constant if knowing that the operand is non-nan is enough. We prefer to
5427 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5428 // materialize 0.0.
5429 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5430 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5431
5432 // setcc (fneg x), C -> setcc swap(pred) x, -C
5433 if (N0.getOpcode() == ISD::FNEG) {
5435 if (DCI.isBeforeLegalizeOps() ||
5436 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5437 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5438 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5439 }
5440 }
5441
5442 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5444 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5445 bool IsFabs = N0.getOpcode() == ISD::FABS;
5446 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5447 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5448 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5449 : (IsFabs ? fcInf : fcPosInf);
5450 if (Cond == ISD::SETUEQ)
5451 Flag |= fcNan;
5452 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5453 DAG.getTargetConstant(Flag, dl, MVT::i32));
5454 }
5455 }
5456
5457 // If the condition is not legal, see if we can find an equivalent one
5458 // which is legal.
5460 // If the comparison was an awkward floating-point == or != and one of
5461 // the comparison operands is infinity or negative infinity, convert the
5462 // condition to a less-awkward <= or >=.
5463 if (CFP->getValueAPF().isInfinity()) {
5464 bool IsNegInf = CFP->getValueAPF().isNegative();
5466 switch (Cond) {
5467 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5468 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5469 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5470 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5471 default: break;
5472 }
5473 if (NewCond != ISD::SETCC_INVALID &&
5474 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5475 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5476 }
5477 }
5478 }
5479
5480 if (N0 == N1) {
5481 // The sext(setcc()) => setcc() optimization relies on the appropriate
5482 // constant being emitted.
5483 assert(!N0.getValueType().isInteger() &&
5484 "Integer types should be handled by FoldSetCC");
5485
5486 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5487 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5488 if (UOF == 2) // FP operators that are undefined on NaNs.
5489 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5490 if (UOF == unsigned(EqTrue))
5491 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5492 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5493 // if it is not already.
5494 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5495 if (NewCond != Cond &&
5496 (DCI.isBeforeLegalizeOps() ||
5497 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5498 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5499 }
5500
5501 // ~X > ~Y --> Y > X
5502 // ~X < ~Y --> Y < X
5503 // ~X < C --> X > ~C
5504 // ~X > C --> X < ~C
5505 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5506 N0.getValueType().isInteger()) {
5507 if (isBitwiseNot(N0)) {
5508 if (isBitwiseNot(N1))
5509 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5510
5513 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5514 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5515 }
5516 }
5517 }
5518
5519 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5520 N0.getValueType().isInteger()) {
5521 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5522 N0.getOpcode() == ISD::XOR) {
5523 // Simplify (X+Y) == (X+Z) --> Y == Z
5524 if (N0.getOpcode() == N1.getOpcode()) {
5525 if (N0.getOperand(0) == N1.getOperand(0))
5526 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5527 if (N0.getOperand(1) == N1.getOperand(1))
5528 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5529 if (isCommutativeBinOp(N0.getOpcode())) {
5530 // If X op Y == Y op X, try other combinations.
5531 if (N0.getOperand(0) == N1.getOperand(1))
5532 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5533 Cond);
5534 if (N0.getOperand(1) == N1.getOperand(0))
5535 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5536 Cond);
5537 }
5538 }
5539
5540 // If RHS is a legal immediate value for a compare instruction, we need
5541 // to be careful about increasing register pressure needlessly.
5542 bool LegalRHSImm = false;
5543
5544 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5545 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5546 // Turn (X+C1) == C2 --> X == C2-C1
5547 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5548 return DAG.getSetCC(
5549 dl, VT, N0.getOperand(0),
5550 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5551 dl, N0.getValueType()),
5552 Cond);
5553
5554 // Turn (X^C1) == C2 --> X == C1^C2
5555 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5556 return DAG.getSetCC(
5557 dl, VT, N0.getOperand(0),
5558 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5559 dl, N0.getValueType()),
5560 Cond);
5561 }
5562
5563 // Turn (C1-X) == C2 --> X == C1-C2
5564 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5565 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5566 return DAG.getSetCC(
5567 dl, VT, N0.getOperand(1),
5568 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5569 dl, N0.getValueType()),
5570 Cond);
5571
5572 // Could RHSC fold directly into a compare?
5573 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5574 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5575 }
5576
5577 // (X+Y) == X --> Y == 0 and similar folds.
5578 // Don't do this if X is an immediate that can fold into a cmp
5579 // instruction and X+Y has other uses. It could be an induction variable
5580 // chain, and the transform would increase register pressure.
5581 if (!LegalRHSImm || N0.hasOneUse())
5582 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5583 return V;
5584 }
5585
5586 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5587 N1.getOpcode() == ISD::XOR)
5588 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5589 return V;
5590
5591 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5592 return V;
5593
5594 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5595 return V;
5596 }
5597
5598 // Fold remainder of division by a constant.
5599 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5600 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5601 // When division is cheap or optimizing for minimum size,
5602 // fall through to DIVREM creation by skipping this fold.
5603 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5604 if (N0.getOpcode() == ISD::UREM) {
5605 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5606 return Folded;
5607 } else if (N0.getOpcode() == ISD::SREM) {
5608 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5609 return Folded;
5610 }
5611 }
5612 }
5613
5614 // Fold away ALL boolean setcc's.
5615 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5616 SDValue Temp;
5617 switch (Cond) {
5618 default: llvm_unreachable("Unknown integer setcc!");
5619 case ISD::SETEQ: // X == Y -> ~(X^Y)
5620 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5621 N0 = DAG.getNOT(dl, Temp, OpVT);
5622 if (!DCI.isCalledByLegalizer())
5623 DCI.AddToWorklist(Temp.getNode());
5624 break;
5625 case ISD::SETNE: // X != Y --> (X^Y)
5626 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5627 break;
5628 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5629 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5630 Temp = DAG.getNOT(dl, N0, OpVT);
5631 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5632 if (!DCI.isCalledByLegalizer())
5633 DCI.AddToWorklist(Temp.getNode());
5634 break;
5635 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5636 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5637 Temp = DAG.getNOT(dl, N1, OpVT);
5638 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5639 if (!DCI.isCalledByLegalizer())
5640 DCI.AddToWorklist(Temp.getNode());
5641 break;
5642 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5643 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5644 Temp = DAG.getNOT(dl, N0, OpVT);
5645 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5646 if (!DCI.isCalledByLegalizer())
5647 DCI.AddToWorklist(Temp.getNode());
5648 break;
5649 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5650 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5651 Temp = DAG.getNOT(dl, N1, OpVT);
5652 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5653 break;
5654 }
5655 if (VT.getScalarType() != MVT::i1) {
5656 if (!DCI.isCalledByLegalizer())
5657 DCI.AddToWorklist(N0.getNode());
5658 // FIXME: If running after legalize, we probably can't do this.
5660 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5661 }
5662 return N0;
5663 }
5664
5665 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5666 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5667 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5669 N1->getFlags().hasNoUnsignedWrap()) ||
5671 N1->getFlags().hasNoSignedWrap())) &&
5673 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5674 }
5675
5676 // Could not fold it.
5677 return SDValue();
5678}
5679
5680/// Returns true (and the GlobalValue and the offset) if the node is a
5681/// GlobalAddress + offset.
5683 int64_t &Offset) const {
5684
5685 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5686
5687 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5688 GA = GASD->getGlobal();
5689 Offset += GASD->getOffset();
5690 return true;
5691 }
5692
5693 if (N->isAnyAdd()) {
5694 SDValue N1 = N->getOperand(0);
5695 SDValue N2 = N->getOperand(1);
5696 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5697 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5698 Offset += V->getSExtValue();
5699 return true;
5700 }
5701 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5702 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5703 Offset += V->getSExtValue();
5704 return true;
5705 }
5706 }
5707 }
5708
5709 return false;
5710}
5711
5713 DAGCombinerInfo &DCI) const {
5714 // Default implementation: no optimization.
5715 return SDValue();
5716}
5717
5718//===----------------------------------------------------------------------===//
5719// Inline Assembler Implementation Methods
5720//===----------------------------------------------------------------------===//
5721
5724 unsigned S = Constraint.size();
5725
5726 if (S == 1) {
5727 switch (Constraint[0]) {
5728 default: break;
5729 case 'r':
5730 return C_RegisterClass;
5731 case 'm': // memory
5732 case 'o': // offsetable
5733 case 'V': // not offsetable
5734 return C_Memory;
5735 case 'p': // Address.
5736 return C_Address;
5737 case 'n': // Simple Integer
5738 case 'E': // Floating Point Constant
5739 case 'F': // Floating Point Constant
5740 return C_Immediate;
5741 case 'i': // Simple Integer or Relocatable Constant
5742 case 's': // Relocatable Constant
5743 case 'X': // Allow ANY value.
5744 case 'I': // Target registers.
5745 case 'J':
5746 case 'K':
5747 case 'L':
5748 case 'M':
5749 case 'N':
5750 case 'O':
5751 case 'P':
5752 case '<':
5753 case '>':
5754 return C_Other;
5755 }
5756 }
5757
5758 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5759 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5760 return C_Memory;
5761 return C_Register;
5762 }
5763 return C_Unknown;
5764}
5765
5766/// Try to replace an X constraint, which matches anything, with another that
5767/// has more specific requirements based on the type of the corresponding
5768/// operand.
5769const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5770 if (ConstraintVT.isInteger())
5771 return "r";
5772 if (ConstraintVT.isFloatingPoint())
5773 return "f"; // works for many targets
5774 return nullptr;
5775}
5776
5778 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5779 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5780 return SDValue();
5781}
5782
5783/// Lower the specified operand into the Ops vector.
5784/// If it is invalid, don't add anything to Ops.
5786 StringRef Constraint,
5787 std::vector<SDValue> &Ops,
5788 SelectionDAG &DAG) const {
5789
5790 if (Constraint.size() > 1)
5791 return;
5792
5793 char ConstraintLetter = Constraint[0];
5794 switch (ConstraintLetter) {
5795 default: break;
5796 case 'X': // Allows any operand
5797 case 'i': // Simple Integer or Relocatable Constant
5798 case 'n': // Simple Integer
5799 case 's': { // Relocatable Constant
5800
5802 uint64_t Offset = 0;
5803
5804 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5805 // etc., since getelementpointer is variadic. We can't use
5806 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5807 // while in this case the GA may be furthest from the root node which is
5808 // likely an ISD::ADD.
5809 while (true) {
5810 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5811 // gcc prints these as sign extended. Sign extend value to 64 bits
5812 // now; without this it would get ZExt'd later in
5813 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5814 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5815 BooleanContent BCont = getBooleanContents(MVT::i64);
5816 ISD::NodeType ExtOpc =
5817 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5818 int64_t ExtVal =
5819 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5820 Ops.push_back(
5821 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5822 return;
5823 }
5824 if (ConstraintLetter != 'n') {
5825 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5826 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5827 GA->getValueType(0),
5828 Offset + GA->getOffset()));
5829 return;
5830 }
5831 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5832 Ops.push_back(DAG.getTargetBlockAddress(
5833 BA->getBlockAddress(), BA->getValueType(0),
5834 Offset + BA->getOffset(), BA->getTargetFlags()));
5835 return;
5836 }
5838 Ops.push_back(Op);
5839 return;
5840 }
5841 }
5842 const unsigned OpCode = Op.getOpcode();
5843 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5844 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5845 Op = Op.getOperand(1);
5846 // Subtraction is not commutative.
5847 else if (OpCode == ISD::ADD &&
5848 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5849 Op = Op.getOperand(0);
5850 else
5851 return;
5852 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5853 continue;
5854 }
5855 return;
5856 }
5857 break;
5858 }
5859 }
5860}
5861
5865
5866std::pair<unsigned, const TargetRegisterClass *>
5868 StringRef Constraint,
5869 MVT VT) const {
5870 if (!Constraint.starts_with("{"))
5871 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5872 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5873
5874 // Remove the braces from around the name.
5875 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5876
5877 std::pair<unsigned, const TargetRegisterClass *> R =
5878 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5879
5880 // Figure out which register class contains this reg.
5881 for (const TargetRegisterClass *RC : RI->regclasses()) {
5882 // If none of the value types for this register class are valid, we
5883 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5884 if (!isLegalRC(*RI, *RC))
5885 continue;
5886
5887 for (const MCPhysReg &PR : *RC) {
5888 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5889 std::pair<unsigned, const TargetRegisterClass *> S =
5890 std::make_pair(PR, RC);
5891
5892 // If this register class has the requested value type, return it,
5893 // otherwise keep searching and return the first class found
5894 // if no other is found which explicitly has the requested type.
5895 if (RI->isTypeLegalForClass(*RC, VT))
5896 return S;
5897 if (!R.second)
5898 R = S;
5899 }
5900 }
5901 }
5902
5903 return R;
5904}
5905
5906//===----------------------------------------------------------------------===//
5907// Constraint Selection.
5908
5909/// Return true of this is an input operand that is a matching constraint like
5910/// "4".
5912 assert(!ConstraintCode.empty() && "No known constraint!");
5913 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5914}
5915
5916/// If this is an input matching constraint, this method returns the output
5917/// operand it matches.
5919 assert(!ConstraintCode.empty() && "No known constraint!");
5920 return atoi(ConstraintCode.c_str());
5921}
5922
5923/// Split up the constraint string from the inline assembly value into the
5924/// specific constraints and their prefixes, and also tie in the associated
5925/// operand values.
5926/// If this returns an empty vector, and if the constraint string itself
5927/// isn't empty, there was an error parsing.
5930 const TargetRegisterInfo *TRI,
5931 const CallBase &Call) const {
5932 /// Information about all of the constraints.
5933 AsmOperandInfoVector ConstraintOperands;
5934 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5935 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5936
5937 // Do a prepass over the constraints, canonicalizing them, and building up the
5938 // ConstraintOperands list.
5939 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5940 unsigned ResNo = 0; // ResNo - The result number of the next output.
5941 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5942
5943 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5944 ConstraintOperands.emplace_back(std::move(CI));
5945 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5946
5947 // Update multiple alternative constraint count.
5948 if (OpInfo.multipleAlternatives.size() > maCount)
5949 maCount = OpInfo.multipleAlternatives.size();
5950
5951 OpInfo.ConstraintVT = MVT::Other;
5952
5953 // Compute the value type for each operand.
5954 switch (OpInfo.Type) {
5956 // Indirect outputs just consume an argument.
5957 if (OpInfo.isIndirect) {
5958 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5959 break;
5960 }
5961
5962 // The return value of the call is this value. As such, there is no
5963 // corresponding argument.
5964 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5965 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5966 OpInfo.ConstraintVT =
5967 getAsmOperandValueType(DL, STy->getElementType(ResNo))
5968 .getSimpleVT();
5969 } else {
5970 assert(ResNo == 0 && "Asm only has one result!");
5971 OpInfo.ConstraintVT =
5973 }
5974 ++ResNo;
5975 break;
5976 case InlineAsm::isInput:
5977 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5978 break;
5979 case InlineAsm::isLabel:
5980 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5981 ++LabelNo;
5982 continue;
5984 // Nothing to do.
5985 break;
5986 }
5987
5988 if (OpInfo.CallOperandVal) {
5989 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5990 if (OpInfo.isIndirect) {
5991 OpTy = Call.getParamElementType(ArgNo);
5992 assert(OpTy && "Indirect operand must have elementtype attribute");
5993 }
5994
5995 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5996 if (StructType *STy = dyn_cast<StructType>(OpTy))
5997 if (STy->getNumElements() == 1)
5998 OpTy = STy->getElementType(0);
5999
6000 // If OpTy is not a single value, it may be a struct/union that we
6001 // can tile with integers.
6002 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6003 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6004 switch (BitSize) {
6005 default: break;
6006 case 1:
6007 case 8:
6008 case 16:
6009 case 32:
6010 case 64:
6011 case 128:
6012 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6013 break;
6014 }
6015 }
6016
6017 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6018 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6019 ArgNo++;
6020 }
6021 }
6022
6023 // If we have multiple alternative constraints, select the best alternative.
6024 if (!ConstraintOperands.empty()) {
6025 if (maCount) {
6026 unsigned bestMAIndex = 0;
6027 int bestWeight = -1;
6028 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6029 int weight = -1;
6030 unsigned maIndex;
6031 // Compute the sums of the weights for each alternative, keeping track
6032 // of the best (highest weight) one so far.
6033 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6034 int weightSum = 0;
6035 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6036 cIndex != eIndex; ++cIndex) {
6037 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6038 if (OpInfo.Type == InlineAsm::isClobber)
6039 continue;
6040
6041 // If this is an output operand with a matching input operand,
6042 // look up the matching input. If their types mismatch, e.g. one
6043 // is an integer, the other is floating point, or their sizes are
6044 // different, flag it as an maCantMatch.
6045 if (OpInfo.hasMatchingInput()) {
6046 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6047 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6048 if ((OpInfo.ConstraintVT.isInteger() !=
6049 Input.ConstraintVT.isInteger()) ||
6050 (OpInfo.ConstraintVT.getSizeInBits() !=
6051 Input.ConstraintVT.getSizeInBits())) {
6052 weightSum = -1; // Can't match.
6053 break;
6054 }
6055 }
6056 }
6057 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6058 if (weight == -1) {
6059 weightSum = -1;
6060 break;
6061 }
6062 weightSum += weight;
6063 }
6064 // Update best.
6065 if (weightSum > bestWeight) {
6066 bestWeight = weightSum;
6067 bestMAIndex = maIndex;
6068 }
6069 }
6070
6071 // Now select chosen alternative in each constraint.
6072 for (AsmOperandInfo &cInfo : ConstraintOperands)
6073 if (cInfo.Type != InlineAsm::isClobber)
6074 cInfo.selectAlternative(bestMAIndex);
6075 }
6076 }
6077
6078 // Check and hook up tied operands, choose constraint code to use.
6079 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6080 cIndex != eIndex; ++cIndex) {
6081 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6082
6083 // If this is an output operand with a matching input operand, look up the
6084 // matching input. If their types mismatch, e.g. one is an integer, the
6085 // other is floating point, or their sizes are different, flag it as an
6086 // error.
6087 if (OpInfo.hasMatchingInput()) {
6088 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6089
6090 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6091 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6092 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6093 OpInfo.ConstraintVT);
6094 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6095 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6096 Input.ConstraintVT);
6097 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6098 OpInfo.ConstraintVT.isFloatingPoint();
6099 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6100 Input.ConstraintVT.isFloatingPoint();
6101 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6102 (MatchRC.second != InputRC.second)) {
6103 report_fatal_error("Unsupported asm: input constraint"
6104 " with a matching output constraint of"
6105 " incompatible type!");
6106 }
6107 }
6108 }
6109 }
6110
6111 return ConstraintOperands;
6112}
6113
6114/// Return a number indicating our preference for chosing a type of constraint
6115/// over another, for the purpose of sorting them. Immediates are almost always
6116/// preferrable (when they can be emitted). A higher return value means a
6117/// stronger preference for one constraint type relative to another.
6118/// FIXME: We should prefer registers over memory but doing so may lead to
6119/// unrecoverable register exhaustion later.
6120/// https://github.com/llvm/llvm-project/issues/20571
6122 switch (CT) {
6125 return 4;
6128 return 3;
6130 return 2;
6132 return 1;
6134 return 0;
6135 }
6136 llvm_unreachable("Invalid constraint type");
6137}
6138
6139/// Examine constraint type and operand type and determine a weight value.
6140/// This object must already have been set up with the operand type
6141/// and the current alternative constraint selected.
6144 AsmOperandInfo &info, int maIndex) const {
6146 if (maIndex >= (int)info.multipleAlternatives.size())
6147 rCodes = &info.Codes;
6148 else
6149 rCodes = &info.multipleAlternatives[maIndex].Codes;
6150 ConstraintWeight BestWeight = CW_Invalid;
6151
6152 // Loop over the options, keeping track of the most general one.
6153 for (const std::string &rCode : *rCodes) {
6154 ConstraintWeight weight =
6155 getSingleConstraintMatchWeight(info, rCode.c_str());
6156 if (weight > BestWeight)
6157 BestWeight = weight;
6158 }
6159
6160 return BestWeight;
6161}
6162
6163/// Examine constraint type and operand type and determine a weight value.
6164/// This object must already have been set up with the operand type
6165/// and the current alternative constraint selected.
6168 AsmOperandInfo &info, const char *constraint) const {
6170 Value *CallOperandVal = info.CallOperandVal;
6171 // If we don't have a value, we can't do a match,
6172 // but allow it at the lowest weight.
6173 if (!CallOperandVal)
6174 return CW_Default;
6175 // Look at the constraint type.
6176 switch (*constraint) {
6177 case 'i': // immediate integer.
6178 case 'n': // immediate integer with a known value.
6179 if (isa<ConstantInt>(CallOperandVal))
6180 weight = CW_Constant;
6181 break;
6182 case 's': // non-explicit intregal immediate.
6183 if (isa<GlobalValue>(CallOperandVal))
6184 weight = CW_Constant;
6185 break;
6186 case 'E': // immediate float if host format.
6187 case 'F': // immediate float.
6188 if (isa<ConstantFP>(CallOperandVal))
6189 weight = CW_Constant;
6190 break;
6191 case '<': // memory operand with autodecrement.
6192 case '>': // memory operand with autoincrement.
6193 case 'm': // memory operand.
6194 case 'o': // offsettable memory operand
6195 case 'V': // non-offsettable memory operand
6196 weight = CW_Memory;
6197 break;
6198 case 'r': // general register.
6199 case 'g': // general register, memory operand or immediate integer.
6200 // note: Clang converts "g" to "imr".
6201 if (CallOperandVal->getType()->isIntegerTy())
6202 weight = CW_Register;
6203 break;
6204 case 'X': // any operand.
6205 default:
6206 weight = CW_Default;
6207 break;
6208 }
6209 return weight;
6210}
6211
6212/// If there are multiple different constraints that we could pick for this
6213/// operand (e.g. "imr") try to pick the 'best' one.
6214/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6215/// into seven classes:
6216/// Register -> one specific register
6217/// RegisterClass -> a group of regs
6218/// Memory -> memory
6219/// Address -> a symbolic memory reference
6220/// Immediate -> immediate values
6221/// Other -> magic values (such as "Flag Output Operands")
6222/// Unknown -> something we don't recognize yet and can't handle
6223/// Ideally, we would pick the most specific constraint possible: if we have
6224/// something that fits into a register, we would pick it. The problem here
6225/// is that if we have something that could either be in a register or in
6226/// memory that use of the register could cause selection of *other*
6227/// operands to fail: they might only succeed if we pick memory. Because of
6228/// this the heuristic we use is:
6229///
6230/// 1) If there is an 'other' constraint, and if the operand is valid for
6231/// that constraint, use it. This makes us take advantage of 'i'
6232/// constraints when available.
6233/// 2) Otherwise, pick the most general constraint present. This prefers
6234/// 'm' over 'r', for example.
6235///
6237 TargetLowering::AsmOperandInfo &OpInfo) const {
6238 ConstraintGroup Ret;
6239
6240 Ret.reserve(OpInfo.Codes.size());
6241 for (StringRef Code : OpInfo.Codes) {
6243
6244 // Indirect 'other' or 'immediate' constraints are not allowed.
6245 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6246 CType == TargetLowering::C_Register ||
6248 continue;
6249
6250 // Things with matching constraints can only be registers, per gcc
6251 // documentation. This mainly affects "g" constraints.
6252 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6253 continue;
6254
6255 Ret.emplace_back(Code, CType);
6256 }
6257
6259 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6260 });
6261
6262 return Ret;
6263}
6264
6265/// If we have an immediate, see if we can lower it. Return true if we can,
6266/// false otherwise.
6268 SDValue Op, SelectionDAG *DAG,
6269 const TargetLowering &TLI) {
6270
6271 assert((P.second == TargetLowering::C_Other ||
6272 P.second == TargetLowering::C_Immediate) &&
6273 "need immediate or other");
6274
6275 if (!Op.getNode())
6276 return false;
6277
6278 std::vector<SDValue> ResultOps;
6279 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6280 return !ResultOps.empty();
6281}
6282
6283/// Determines the constraint code and constraint type to use for the specific
6284/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6286 SDValue Op,
6287 SelectionDAG *DAG) const {
6288 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6289
6290 // Single-letter constraints ('r') are very common.
6291 if (OpInfo.Codes.size() == 1) {
6292 OpInfo.ConstraintCode = OpInfo.Codes[0];
6293 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6294 } else {
6296 if (G.empty())
6297 return;
6298
6299 unsigned BestIdx = 0;
6300 for (const unsigned E = G.size();
6301 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6302 G[BestIdx].second == TargetLowering::C_Immediate);
6303 ++BestIdx) {
6304 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6305 break;
6306 // If we're out of constraints, just pick the first one.
6307 if (BestIdx + 1 == E) {
6308 BestIdx = 0;
6309 break;
6310 }
6311 }
6312
6313 OpInfo.ConstraintCode = G[BestIdx].first;
6314 OpInfo.ConstraintType = G[BestIdx].second;
6315 }
6316
6317 // 'X' matches anything.
6318 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6319 // Constants are handled elsewhere. For Functions, the type here is the
6320 // type of the result, which is not what we want to look at; leave them
6321 // alone.
6322 Value *v = OpInfo.CallOperandVal;
6323 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6324 return;
6325 }
6326
6327 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6328 OpInfo.ConstraintCode = "i";
6329 return;
6330 }
6331
6332 // Otherwise, try to resolve it to something we know about by looking at
6333 // the actual operand type.
6334 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6335 OpInfo.ConstraintCode = Repl;
6336 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6337 }
6338 }
6339}
6340
6341/// Given an exact SDIV by a constant, create a multiplication
6342/// with the multiplicative inverse of the constant.
6343/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6345 const SDLoc &dl, SelectionDAG &DAG,
6346 SmallVectorImpl<SDNode *> &Created) {
6347 SDValue Op0 = N->getOperand(0);
6348 SDValue Op1 = N->getOperand(1);
6349 EVT VT = N->getValueType(0);
6350 EVT SVT = VT.getScalarType();
6351 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6352 EVT ShSVT = ShVT.getScalarType();
6353
6354 bool UseSRA = false;
6355 SmallVector<SDValue, 16> Shifts, Factors;
6356
6357 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6358 if (C->isZero())
6359 return false;
6360 APInt Divisor = C->getAPIntValue();
6361 unsigned Shift = Divisor.countr_zero();
6362 if (Shift) {
6363 Divisor.ashrInPlace(Shift);
6364 UseSRA = true;
6365 }
6366 APInt Factor = Divisor.multiplicativeInverse();
6367 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6368 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6369 return true;
6370 };
6371
6372 // Collect all magic values from the build vector.
6373 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6374 return SDValue();
6375
6376 SDValue Shift, Factor;
6377 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6378 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6379 Factor = DAG.getBuildVector(VT, dl, Factors);
6380 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6381 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6382 "Expected matchUnaryPredicate to return one element for scalable "
6383 "vectors");
6384 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6385 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6386 } else {
6387 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6388 Shift = Shifts[0];
6389 Factor = Factors[0];
6390 }
6391
6392 SDValue Res = Op0;
6393 if (UseSRA) {
6394 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6395 Created.push_back(Res.getNode());
6396 }
6397
6398 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6399}
6400
6401/// Given an exact UDIV by a constant, create a multiplication
6402/// with the multiplicative inverse of the constant.
6403/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6405 const SDLoc &dl, SelectionDAG &DAG,
6406 SmallVectorImpl<SDNode *> &Created) {
6407 EVT VT = N->getValueType(0);
6408 EVT SVT = VT.getScalarType();
6409 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6410 EVT ShSVT = ShVT.getScalarType();
6411
6412 bool UseSRL = false;
6413 SmallVector<SDValue, 16> Shifts, Factors;
6414
6415 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6416 if (C->isZero())
6417 return false;
6418 APInt Divisor = C->getAPIntValue();
6419 unsigned Shift = Divisor.countr_zero();
6420 if (Shift) {
6421 Divisor.lshrInPlace(Shift);
6422 UseSRL = true;
6423 }
6424 // Calculate the multiplicative inverse modulo BW.
6425 APInt Factor = Divisor.multiplicativeInverse();
6426 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6427 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6428 return true;
6429 };
6430
6431 SDValue Op1 = N->getOperand(1);
6432
6433 // Collect all magic values from the build vector.
6434 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6435 return SDValue();
6436
6437 SDValue Shift, Factor;
6438 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6439 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6440 Factor = DAG.getBuildVector(VT, dl, Factors);
6441 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6442 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6443 "Expected matchUnaryPredicate to return one element for scalable "
6444 "vectors");
6445 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6446 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6447 } else {
6448 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6449 Shift = Shifts[0];
6450 Factor = Factors[0];
6451 }
6452
6453 SDValue Res = N->getOperand(0);
6454 if (UseSRL) {
6455 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6456 Created.push_back(Res.getNode());
6457 }
6458
6459 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6460}
6461
6463 SelectionDAG &DAG,
6464 SmallVectorImpl<SDNode *> &Created) const {
6465 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6466 if (isIntDivCheap(N->getValueType(0), Attr))
6467 return SDValue(N, 0); // Lower SDIV as SDIV
6468 return SDValue();
6469}
6470
6471SDValue
6473 SelectionDAG &DAG,
6474 SmallVectorImpl<SDNode *> &Created) const {
6475 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6476 if (isIntDivCheap(N->getValueType(0), Attr))
6477 return SDValue(N, 0); // Lower SREM as SREM
6478 return SDValue();
6479}
6480
6481/// Build sdiv by power-of-2 with conditional move instructions
6482/// Ref: "Hacker's Delight" by Henry Warren 10-1
6483/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6484/// bgez x, label
6485/// add x, x, 2**k-1
6486/// label:
6487/// sra res, x, k
6488/// neg res, res (when the divisor is negative)
6490 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6491 SmallVectorImpl<SDNode *> &Created) const {
6492 unsigned Lg2 = Divisor.countr_zero();
6493 EVT VT = N->getValueType(0);
6494
6495 SDLoc DL(N);
6496 SDValue N0 = N->getOperand(0);
6497 SDValue Zero = DAG.getConstant(0, DL, VT);
6498 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6499 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6500
6501 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6502 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6503 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6504 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6505 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6506
6507 Created.push_back(Cmp.getNode());
6508 Created.push_back(Add.getNode());
6509 Created.push_back(CMov.getNode());
6510
6511 // Divide by pow2.
6512 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6513 DAG.getShiftAmountConstant(Lg2, VT, DL));
6514
6515 // If we're dividing by a positive value, we're done. Otherwise, we must
6516 // negate the result.
6517 if (Divisor.isNonNegative())
6518 return SRA;
6519
6520 Created.push_back(SRA.getNode());
6521 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6522}
6523
6524/// Given an ISD::SDIV node expressing a divide by constant,
6525/// return a DAG expression to select that will generate the same value by
6526/// multiplying by a magic number.
6527/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6529 bool IsAfterLegalization,
6530 bool IsAfterLegalTypes,
6531 SmallVectorImpl<SDNode *> &Created) const {
6532 SDLoc dl(N);
6533 EVT VT = N->getValueType(0);
6534 EVT SVT = VT.getScalarType();
6535 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6536 EVT ShSVT = ShVT.getScalarType();
6537 unsigned EltBits = VT.getScalarSizeInBits();
6538 EVT MulVT;
6539
6540 // Check to see if we can do this.
6541 // FIXME: We should be more aggressive here.
6542 if (!isTypeLegal(VT)) {
6543 // Limit this to simple scalars for now.
6544 if (VT.isVector() || !VT.isSimple())
6545 return SDValue();
6546
6547 // If this type will be promoted to a large enough type with a legal
6548 // multiply operation, we can go ahead and do this transform.
6550 return SDValue();
6551
6552 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6553 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6554 !isOperationLegal(ISD::MUL, MulVT))
6555 return SDValue();
6556 }
6557
6558 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6559 if (N->getFlags().hasExact())
6560 return BuildExactSDIV(*this, N, dl, DAG, Created);
6561
6562 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6563
6564 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6565 if (C->isZero())
6566 return false;
6567
6568 const APInt &Divisor = C->getAPIntValue();
6570 int NumeratorFactor = 0;
6571 int ShiftMask = -1;
6572
6573 if (Divisor.isOne() || Divisor.isAllOnes()) {
6574 // If d is +1/-1, we just multiply the numerator by +1/-1.
6575 NumeratorFactor = Divisor.getSExtValue();
6576 magics.Magic = 0;
6577 magics.ShiftAmount = 0;
6578 ShiftMask = 0;
6579 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6580 // If d > 0 and m < 0, add the numerator.
6581 NumeratorFactor = 1;
6582 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6583 // If d < 0 and m > 0, subtract the numerator.
6584 NumeratorFactor = -1;
6585 }
6586
6587 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6588 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6589 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6590 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6591 return true;
6592 };
6593
6594 SDValue N0 = N->getOperand(0);
6595 SDValue N1 = N->getOperand(1);
6596
6597 // Collect the shifts / magic values from each element.
6598 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6599 return SDValue();
6600
6601 SDValue MagicFactor, Factor, Shift, ShiftMask;
6602 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6603 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6604 Factor = DAG.getBuildVector(VT, dl, Factors);
6605 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6606 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6607 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6608 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6609 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6610 "Expected matchUnaryPredicate to return one element for scalable "
6611 "vectors");
6612 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6613 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6614 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6615 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6616 } else {
6617 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6618 MagicFactor = MagicFactors[0];
6619 Factor = Factors[0];
6620 Shift = Shifts[0];
6621 ShiftMask = ShiftMasks[0];
6622 }
6623
6624 // Multiply the numerator (operand 0) by the magic value.
6625 // FIXME: We should support doing a MUL in a wider type.
6626 auto GetMULHS = [&](SDValue X, SDValue Y) {
6627 // If the type isn't legal, use a wider mul of the type calculated
6628 // earlier.
6629 if (!isTypeLegal(VT)) {
6630 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6631 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6632 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6633 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6634 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6635 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6636 }
6637
6638 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6639 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6640 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6641 SDValue LoHi =
6642 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6643 return SDValue(LoHi.getNode(), 1);
6644 }
6645 // If type twice as wide legal, widen and use a mul plus a shift.
6646 unsigned Size = VT.getScalarSizeInBits();
6647 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6648 if (VT.isVector())
6649 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6651 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6652 // custom lowered. This is very expensive so avoid it at all costs for
6653 // constant divisors.
6654 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6657 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6658 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6659 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6660 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6661 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6662 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6663 }
6664 return SDValue();
6665 };
6666
6667 SDValue Q = GetMULHS(N0, MagicFactor);
6668 if (!Q)
6669 return SDValue();
6670
6671 Created.push_back(Q.getNode());
6672
6673 // (Optionally) Add/subtract the numerator using Factor.
6674 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6675 Created.push_back(Factor.getNode());
6676 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6677 Created.push_back(Q.getNode());
6678
6679 // Shift right algebraic by shift value.
6680 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6681 Created.push_back(Q.getNode());
6682
6683 // Extract the sign bit, mask it and add it to the quotient.
6684 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6685 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6686 Created.push_back(T.getNode());
6687 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6688 Created.push_back(T.getNode());
6689 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6690}
6691
6692/// Given an ISD::UDIV node expressing a divide by constant,
6693/// return a DAG expression to select that will generate the same value by
6694/// multiplying by a magic number.
6695/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6697 bool IsAfterLegalization,
6698 bool IsAfterLegalTypes,
6699 SmallVectorImpl<SDNode *> &Created) const {
6700 SDLoc dl(N);
6701 EVT VT = N->getValueType(0);
6702 EVT SVT = VT.getScalarType();
6703 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6704 EVT ShSVT = ShVT.getScalarType();
6705 unsigned EltBits = VT.getScalarSizeInBits();
6706 EVT MulVT;
6707
6708 // Check to see if we can do this.
6709 // FIXME: We should be more aggressive here.
6710 if (!isTypeLegal(VT)) {
6711 // Limit this to simple scalars for now.
6712 if (VT.isVector() || !VT.isSimple())
6713 return SDValue();
6714
6715 // If this type will be promoted to a large enough type with a legal
6716 // multiply operation, we can go ahead and do this transform.
6718 return SDValue();
6719
6720 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6721 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6722 !isOperationLegal(ISD::MUL, MulVT))
6723 return SDValue();
6724 }
6725
6726 // If the udiv has an 'exact' bit we can use a simpler lowering.
6727 if (N->getFlags().hasExact())
6728 return BuildExactUDIV(*this, N, dl, DAG, Created);
6729
6730 SDValue N0 = N->getOperand(0);
6731 SDValue N1 = N->getOperand(1);
6732
6733 // Try to use leading zeros of the dividend to reduce the multiplier and
6734 // avoid expensive fixups.
6735 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6736
6737 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6738 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6739
6740 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6741 if (C->isZero())
6742 return false;
6743 const APInt& Divisor = C->getAPIntValue();
6744
6745 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6746
6747 // Magic algorithm doesn't work for division by 1. We need to emit a select
6748 // at the end.
6749 if (Divisor.isOne()) {
6750 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6751 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6752 } else {
6755 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6756
6757 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6758
6759 assert(magics.PreShift < Divisor.getBitWidth() &&
6760 "We shouldn't generate an undefined shift!");
6761 assert(magics.PostShift < Divisor.getBitWidth() &&
6762 "We shouldn't generate an undefined shift!");
6763 assert((!magics.IsAdd || magics.PreShift == 0) &&
6764 "Unexpected pre-shift");
6765 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6766 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6767 NPQFactor = DAG.getConstant(
6768 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6769 : APInt::getZero(EltBits),
6770 dl, SVT);
6771 UseNPQ |= magics.IsAdd;
6772 UsePreShift |= magics.PreShift != 0;
6773 UsePostShift |= magics.PostShift != 0;
6774 }
6775
6776 PreShifts.push_back(PreShift);
6777 MagicFactors.push_back(MagicFactor);
6778 NPQFactors.push_back(NPQFactor);
6779 PostShifts.push_back(PostShift);
6780 return true;
6781 };
6782
6783 // Collect the shifts/magic values from each element.
6784 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6785 return SDValue();
6786
6787 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6788 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6789 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6790 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6791 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6792 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6793 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6794 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6795 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6796 "Expected matchUnaryPredicate to return one for scalable vectors");
6797 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6798 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6799 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6800 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6801 } else {
6802 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6803 PreShift = PreShifts[0];
6804 MagicFactor = MagicFactors[0];
6805 PostShift = PostShifts[0];
6806 }
6807
6808 SDValue Q = N0;
6809 if (UsePreShift) {
6810 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6811 Created.push_back(Q.getNode());
6812 }
6813
6814 // FIXME: We should support doing a MUL in a wider type.
6815 auto GetMULHU = [&](SDValue X, SDValue Y) {
6816 // If the type isn't legal, use a wider mul of the type calculated
6817 // earlier.
6818 if (!isTypeLegal(VT)) {
6819 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6820 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6821 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6822 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6823 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6824 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6825 }
6826
6827 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6828 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6829 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6830 SDValue LoHi =
6831 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6832 return SDValue(LoHi.getNode(), 1);
6833 }
6834 // If type twice as wide legal, widen and use a mul plus a shift.
6835 unsigned Size = VT.getScalarSizeInBits();
6836 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6837 if (VT.isVector())
6838 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6840 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6841 // custom lowered. This is very expensive so avoid it at all costs for
6842 // constant divisors.
6843 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6846 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6847 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6848 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6849 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6850 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6851 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6852 }
6853 return SDValue(); // No mulhu or equivalent
6854 };
6855
6856 // Multiply the numerator (operand 0) by the magic value.
6857 Q = GetMULHU(Q, MagicFactor);
6858 if (!Q)
6859 return SDValue();
6860
6861 Created.push_back(Q.getNode());
6862
6863 if (UseNPQ) {
6864 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6865 Created.push_back(NPQ.getNode());
6866
6867 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6868 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6869 if (VT.isVector())
6870 NPQ = GetMULHU(NPQ, NPQFactor);
6871 else
6872 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6873
6874 Created.push_back(NPQ.getNode());
6875
6876 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6877 Created.push_back(Q.getNode());
6878 }
6879
6880 if (UsePostShift) {
6881 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6882 Created.push_back(Q.getNode());
6883 }
6884
6885 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6886
6887 SDValue One = DAG.getConstant(1, dl, VT);
6888 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6889 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6890}
6891
6892/// If all values in Values that *don't* match the predicate are same 'splat'
6893/// value, then replace all values with that splat value.
6894/// Else, if AlternativeReplacement was provided, then replace all values that
6895/// do match predicate with AlternativeReplacement value.
6896static void
6898 std::function<bool(SDValue)> Predicate,
6899 SDValue AlternativeReplacement = SDValue()) {
6900 SDValue Replacement;
6901 // Is there a value for which the Predicate does *NOT* match? What is it?
6902 auto SplatValue = llvm::find_if_not(Values, Predicate);
6903 if (SplatValue != Values.end()) {
6904 // Does Values consist only of SplatValue's and values matching Predicate?
6905 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6906 return Value == *SplatValue || Predicate(Value);
6907 })) // Then we shall replace values matching predicate with SplatValue.
6908 Replacement = *SplatValue;
6909 }
6910 if (!Replacement) {
6911 // Oops, we did not find the "baseline" splat value.
6912 if (!AlternativeReplacement)
6913 return; // Nothing to do.
6914 // Let's replace with provided value then.
6915 Replacement = AlternativeReplacement;
6916 }
6917 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6918}
6919
6920/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6921/// where the divisor is constant and the comparison target is zero,
6922/// return a DAG expression that will generate the same comparison result
6923/// using only multiplications, additions and shifts/rotations.
6924/// Ref: "Hacker's Delight" 10-17.
6925SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6926 SDValue CompTargetNode,
6928 DAGCombinerInfo &DCI,
6929 const SDLoc &DL) const {
6931 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6932 DCI, DL, Built)) {
6933 for (SDNode *N : Built)
6934 DCI.AddToWorklist(N);
6935 return Folded;
6936 }
6937
6938 return SDValue();
6939}
6940
6941SDValue
6942TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6943 SDValue CompTargetNode, ISD::CondCode Cond,
6944 DAGCombinerInfo &DCI, const SDLoc &DL,
6945 SmallVectorImpl<SDNode *> &Created) const {
6946 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6947 // - D must be constant, with D = D0 * 2^K where D0 is odd
6948 // - P is the multiplicative inverse of D0 modulo 2^W
6949 // - Q = floor(((2^W) - 1) / D)
6950 // where W is the width of the common type of N and D.
6951 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6952 "Only applicable for (in)equality comparisons.");
6953
6954 SelectionDAG &DAG = DCI.DAG;
6955
6956 EVT VT = REMNode.getValueType();
6957 EVT SVT = VT.getScalarType();
6958 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6959 EVT ShSVT = ShVT.getScalarType();
6960
6961 // If MUL is unavailable, we cannot proceed in any case.
6962 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6963 return SDValue();
6964
6965 bool ComparingWithAllZeros = true;
6966 bool AllComparisonsWithNonZerosAreTautological = true;
6967 bool HadTautologicalLanes = false;
6968 bool AllLanesAreTautological = true;
6969 bool HadEvenDivisor = false;
6970 bool AllDivisorsArePowerOfTwo = true;
6971 bool HadTautologicalInvertedLanes = false;
6972 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
6973
6974 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6975 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6976 if (CDiv->isZero())
6977 return false;
6978
6979 const APInt &D = CDiv->getAPIntValue();
6980 const APInt &Cmp = CCmp->getAPIntValue();
6981
6982 ComparingWithAllZeros &= Cmp.isZero();
6983
6984 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6985 // if C2 is not less than C1, the comparison is always false.
6986 // But we will only be able to produce the comparison that will give the
6987 // opposive tautological answer. So this lane would need to be fixed up.
6988 bool TautologicalInvertedLane = D.ule(Cmp);
6989 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6990
6991 // If all lanes are tautological (either all divisors are ones, or divisor
6992 // is not greater than the constant we are comparing with),
6993 // we will prefer to avoid the fold.
6994 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6995 HadTautologicalLanes |= TautologicalLane;
6996 AllLanesAreTautological &= TautologicalLane;
6997
6998 // If we are comparing with non-zero, we need'll need to subtract said
6999 // comparison value from the LHS. But there is no point in doing that if
7000 // every lane where we are comparing with non-zero is tautological..
7001 if (!Cmp.isZero())
7002 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7003
7004 // Decompose D into D0 * 2^K
7005 unsigned K = D.countr_zero();
7006 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7007 APInt D0 = D.lshr(K);
7008
7009 // D is even if it has trailing zeros.
7010 HadEvenDivisor |= (K != 0);
7011 // D is a power-of-two if D0 is one.
7012 // If all divisors are power-of-two, we will prefer to avoid the fold.
7013 AllDivisorsArePowerOfTwo &= D0.isOne();
7014
7015 // P = inv(D0, 2^W)
7016 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7017 unsigned W = D.getBitWidth();
7018 APInt P = D0.multiplicativeInverse();
7019 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7020
7021 // Q = floor((2^W - 1) u/ D)
7022 // R = ((2^W - 1) u% D)
7023 APInt Q, R;
7025
7026 // If we are comparing with zero, then that comparison constant is okay,
7027 // else it may need to be one less than that.
7028 if (Cmp.ugt(R))
7029 Q -= 1;
7030
7032 "We are expecting that K is always less than all-ones for ShSVT");
7033
7034 // If the lane is tautological the result can be constant-folded.
7035 if (TautologicalLane) {
7036 // Set P and K amount to a bogus values so we can try to splat them.
7037 P = 0;
7038 K = -1;
7039 // And ensure that comparison constant is tautological,
7040 // it will always compare true/false.
7041 Q = -1;
7042 }
7043
7044 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7045 KAmts.push_back(
7046 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7047 /*implicitTrunc=*/true),
7048 DL, ShSVT));
7049 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7050 return true;
7051 };
7052
7053 SDValue N = REMNode.getOperand(0);
7054 SDValue D = REMNode.getOperand(1);
7055
7056 // Collect the values from each element.
7057 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7058 return SDValue();
7059
7060 // If all lanes are tautological, the result can be constant-folded.
7061 if (AllLanesAreTautological)
7062 return SDValue();
7063
7064 // If this is a urem by a powers-of-two, avoid the fold since it can be
7065 // best implemented as a bit test.
7066 if (AllDivisorsArePowerOfTwo)
7067 return SDValue();
7068
7069 SDValue PVal, KVal, QVal;
7070 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7071 if (HadTautologicalLanes) {
7072 // Try to turn PAmts into a splat, since we don't care about the values
7073 // that are currently '0'. If we can't, just keep '0'`s.
7075 // Try to turn KAmts into a splat, since we don't care about the values
7076 // that are currently '-1'. If we can't, change them to '0'`s.
7078 DAG.getConstant(0, DL, ShSVT));
7079 }
7080
7081 PVal = DAG.getBuildVector(VT, DL, PAmts);
7082 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7083 QVal = DAG.getBuildVector(VT, DL, QAmts);
7084 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7085 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7086 "Expected matchBinaryPredicate to return one element for "
7087 "SPLAT_VECTORs");
7088 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7089 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7090 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7091 } else {
7092 PVal = PAmts[0];
7093 KVal = KAmts[0];
7094 QVal = QAmts[0];
7095 }
7096
7097 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7098 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7099 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7100 assert(CompTargetNode.getValueType() == N.getValueType() &&
7101 "Expecting that the types on LHS and RHS of comparisons match.");
7102 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7103 }
7104
7105 // (mul N, P)
7106 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7107 Created.push_back(Op0.getNode());
7108
7109 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7110 // divisors as a performance improvement, since rotating by 0 is a no-op.
7111 if (HadEvenDivisor) {
7112 // We need ROTR to do this.
7113 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7114 return SDValue();
7115 // UREM: (rotr (mul N, P), K)
7116 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7117 Created.push_back(Op0.getNode());
7118 }
7119
7120 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7121 SDValue NewCC =
7122 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7124 if (!HadTautologicalInvertedLanes)
7125 return NewCC;
7126
7127 // If any lanes previously compared always-false, the NewCC will give
7128 // always-true result for them, so we need to fixup those lanes.
7129 // Or the other way around for inequality predicate.
7130 assert(VT.isVector() && "Can/should only get here for vectors.");
7131 Created.push_back(NewCC.getNode());
7132
7133 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7134 // if C2 is not less than C1, the comparison is always false.
7135 // But we have produced the comparison that will give the
7136 // opposive tautological answer. So these lanes would need to be fixed up.
7137 SDValue TautologicalInvertedChannels =
7138 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7139 Created.push_back(TautologicalInvertedChannels.getNode());
7140
7141 // NOTE: we avoid letting illegal types through even if we're before legalize
7142 // ops – legalization has a hard time producing good code for this.
7143 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7144 // If we have a vector select, let's replace the comparison results in the
7145 // affected lanes with the correct tautological result.
7146 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7147 DL, SETCCVT, SETCCVT);
7148 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7149 Replacement, NewCC);
7150 }
7151
7152 // Else, we can just invert the comparison result in the appropriate lanes.
7153 //
7154 // NOTE: see the note above VSELECT above.
7155 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7156 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7157 TautologicalInvertedChannels);
7158
7159 return SDValue(); // Don't know how to lower.
7160}
7161
7162/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7163/// where the divisor is constant and the comparison target is zero,
7164/// return a DAG expression that will generate the same comparison result
7165/// using only multiplications, additions and shifts/rotations.
7166/// Ref: "Hacker's Delight" 10-17.
7167SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7168 SDValue CompTargetNode,
7170 DAGCombinerInfo &DCI,
7171 const SDLoc &DL) const {
7173 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7174 DCI, DL, Built)) {
7175 assert(Built.size() <= 7 && "Max size prediction failed.");
7176 for (SDNode *N : Built)
7177 DCI.AddToWorklist(N);
7178 return Folded;
7179 }
7180
7181 return SDValue();
7182}
7183
7184SDValue
7185TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7186 SDValue CompTargetNode, ISD::CondCode Cond,
7187 DAGCombinerInfo &DCI, const SDLoc &DL,
7188 SmallVectorImpl<SDNode *> &Created) const {
7189 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7190 // Fold:
7191 // (seteq/ne (srem N, D), 0)
7192 // To:
7193 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7194 //
7195 // - D must be constant, with D = D0 * 2^K where D0 is odd
7196 // - P is the multiplicative inverse of D0 modulo 2^W
7197 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7198 // - Q = floor((2 * A) / (2^K))
7199 // where W is the width of the common type of N and D.
7200 //
7201 // When D is a power of two (and thus D0 is 1), the normal
7202 // formula for A and Q don't apply, because the derivation
7203 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7204 // does not apply. This specifically fails when N = INT_MIN.
7205 //
7206 // Instead, for power-of-two D, we use:
7207 // - A = 2^(W-1)
7208 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7209 // - Q = 2^(W-K) - 1
7210 // |-> Test that the top K bits are zero after rotation
7211 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7212 "Only applicable for (in)equality comparisons.");
7213
7214 SelectionDAG &DAG = DCI.DAG;
7215
7216 EVT VT = REMNode.getValueType();
7217 EVT SVT = VT.getScalarType();
7218 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7219 EVT ShSVT = ShVT.getScalarType();
7220
7221 // If we are after ops legalization, and MUL is unavailable, we can not
7222 // proceed.
7223 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7224 return SDValue();
7225
7226 // TODO: Could support comparing with non-zero too.
7227 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7228 if (!CompTarget || !CompTarget->isZero())
7229 return SDValue();
7230
7231 bool HadIntMinDivisor = false;
7232 bool HadOneDivisor = false;
7233 bool AllDivisorsAreOnes = true;
7234 bool HadEvenDivisor = false;
7235 bool NeedToApplyOffset = false;
7236 bool AllDivisorsArePowerOfTwo = true;
7237 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7238
7239 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7240 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7241 if (C->isZero())
7242 return false;
7243
7244 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7245
7246 // WARNING: this fold is only valid for positive divisors!
7247 APInt D = C->getAPIntValue();
7248 if (D.isNegative())
7249 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7250
7251 HadIntMinDivisor |= D.isMinSignedValue();
7252
7253 // If all divisors are ones, we will prefer to avoid the fold.
7254 HadOneDivisor |= D.isOne();
7255 AllDivisorsAreOnes &= D.isOne();
7256
7257 // Decompose D into D0 * 2^K
7258 unsigned K = D.countr_zero();
7259 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7260 APInt D0 = D.lshr(K);
7261
7262 if (!D.isMinSignedValue()) {
7263 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7264 // we don't care about this lane in this fold, we'll special-handle it.
7265 HadEvenDivisor |= (K != 0);
7266 }
7267
7268 // D is a power-of-two if D0 is one. This includes INT_MIN.
7269 // If all divisors are power-of-two, we will prefer to avoid the fold.
7270 AllDivisorsArePowerOfTwo &= D0.isOne();
7271
7272 // P = inv(D0, 2^W)
7273 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7274 unsigned W = D.getBitWidth();
7275 APInt P = D0.multiplicativeInverse();
7276 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7277
7278 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7279 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7280 A.clearLowBits(K);
7281
7282 if (!D.isMinSignedValue()) {
7283 // If divisor INT_MIN, then we don't care about this lane in this fold,
7284 // we'll special-handle it.
7285 NeedToApplyOffset |= A != 0;
7286 }
7287
7288 // Q = floor((2 * A) / (2^K))
7289 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7290
7292 "We are expecting that A is always less than all-ones for SVT");
7294 "We are expecting that K is always less than all-ones for ShSVT");
7295
7296 // If D was a power of two, apply the alternate constant derivation.
7297 if (D0.isOne()) {
7298 // A = 2^(W-1)
7300 // - Q = 2^(W-K) - 1
7301 Q = APInt::getAllOnes(W - K).zext(W);
7302 }
7303
7304 // If the divisor is 1 the result can be constant-folded. Likewise, we
7305 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7306 if (D.isOne()) {
7307 // Set P, A and K to a bogus values so we can try to splat them.
7308 P = 0;
7309 A = -1;
7310 K = -1;
7311
7312 // x ?% 1 == 0 <--> true <--> x u<= -1
7313 Q = -1;
7314 }
7315
7316 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7317 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7318 KAmts.push_back(
7319 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7320 /*implicitTrunc=*/true),
7321 DL, ShSVT));
7322 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7323 return true;
7324 };
7325
7326 SDValue N = REMNode.getOperand(0);
7327 SDValue D = REMNode.getOperand(1);
7328
7329 // Collect the values from each element.
7330 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7331 return SDValue();
7332
7333 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7334 if (AllDivisorsAreOnes)
7335 return SDValue();
7336
7337 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7338 // since it can be best implemented as a bit test.
7339 if (AllDivisorsArePowerOfTwo)
7340 return SDValue();
7341
7342 SDValue PVal, AVal, KVal, QVal;
7343 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7344 if (HadOneDivisor) {
7345 // Try to turn PAmts into a splat, since we don't care about the values
7346 // that are currently '0'. If we can't, just keep '0'`s.
7348 // Try to turn AAmts into a splat, since we don't care about the
7349 // values that are currently '-1'. If we can't, change them to '0'`s.
7351 DAG.getConstant(0, DL, SVT));
7352 // Try to turn KAmts into a splat, since we don't care about the values
7353 // that are currently '-1'. If we can't, change them to '0'`s.
7355 DAG.getConstant(0, DL, ShSVT));
7356 }
7357
7358 PVal = DAG.getBuildVector(VT, DL, PAmts);
7359 AVal = DAG.getBuildVector(VT, DL, AAmts);
7360 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7361 QVal = DAG.getBuildVector(VT, DL, QAmts);
7362 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7363 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7364 QAmts.size() == 1 &&
7365 "Expected matchUnaryPredicate to return one element for scalable "
7366 "vectors");
7367 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7368 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7369 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7370 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7371 } else {
7372 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7373 PVal = PAmts[0];
7374 AVal = AAmts[0];
7375 KVal = KAmts[0];
7376 QVal = QAmts[0];
7377 }
7378
7379 // (mul N, P)
7380 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7381 Created.push_back(Op0.getNode());
7382
7383 if (NeedToApplyOffset) {
7384 // We need ADD to do this.
7385 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7386 return SDValue();
7387
7388 // (add (mul N, P), A)
7389 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7390 Created.push_back(Op0.getNode());
7391 }
7392
7393 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7394 // divisors as a performance improvement, since rotating by 0 is a no-op.
7395 if (HadEvenDivisor) {
7396 // We need ROTR to do this.
7397 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7398 return SDValue();
7399 // SREM: (rotr (add (mul N, P), A), K)
7400 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7401 Created.push_back(Op0.getNode());
7402 }
7403
7404 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7405 SDValue Fold =
7406 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7408
7409 // If we didn't have lanes with INT_MIN divisor, then we're done.
7410 if (!HadIntMinDivisor)
7411 return Fold;
7412
7413 // That fold is only valid for positive divisors. Which effectively means,
7414 // it is invalid for INT_MIN divisors. So if we have such a lane,
7415 // we must fix-up results for said lanes.
7416 assert(VT.isVector() && "Can/should only get here for vectors.");
7417
7418 // NOTE: we avoid letting illegal types through even if we're before legalize
7419 // ops – legalization has a hard time producing good code for the code that
7420 // follows.
7421 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7425 return SDValue();
7426
7427 Created.push_back(Fold.getNode());
7428
7429 SDValue IntMin = DAG.getConstant(
7431 SDValue IntMax = DAG.getConstant(
7433 SDValue Zero =
7435
7436 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7437 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7438 Created.push_back(DivisorIsIntMin.getNode());
7439
7440 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7441 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7442 Created.push_back(Masked.getNode());
7443 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7444 Created.push_back(MaskedIsZero.getNode());
7445
7446 // To produce final result we need to blend 2 vectors: 'SetCC' and
7447 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7448 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7449 // constant-folded, select can get lowered to a shuffle with constant mask.
7450 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7451 MaskedIsZero, Fold);
7452
7453 return Blended;
7454}
7455
7457 const DenormalMode &Mode) const {
7458 SDLoc DL(Op);
7459 EVT VT = Op.getValueType();
7460 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7461 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7462
7463 // This is specifically a check for the handling of denormal inputs, not the
7464 // result.
7465 if (Mode.Input == DenormalMode::PreserveSign ||
7466 Mode.Input == DenormalMode::PositiveZero) {
7467 // Test = X == 0.0
7468 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7469 }
7470
7471 // Testing it with denormal inputs to avoid wrong estimate.
7472 //
7473 // Test = fabs(X) < SmallestNormal
7474 const fltSemantics &FltSem = VT.getFltSemantics();
7475 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7476 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7477 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7478 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7479}
7480
7482 bool LegalOps, bool OptForSize,
7484 unsigned Depth) const {
7485 // fneg is removable even if it has multiple uses.
7486 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7488 return Op.getOperand(0);
7489 }
7490
7491 // Don't recurse exponentially.
7493 return SDValue();
7494
7495 // Pre-increment recursion depth for use in recursive calls.
7496 ++Depth;
7497 const SDNodeFlags Flags = Op->getFlags();
7498 const TargetOptions &Options = DAG.getTarget().Options;
7499 EVT VT = Op.getValueType();
7500 unsigned Opcode = Op.getOpcode();
7501
7502 // Don't allow anything with multiple uses unless we know it is free.
7503 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7504 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7505 isFPExtFree(VT, Op.getOperand(0).getValueType());
7506 if (!IsFreeExtend)
7507 return SDValue();
7508 }
7509
7510 auto RemoveDeadNode = [&](SDValue N) {
7511 if (N && N.getNode()->use_empty())
7512 DAG.RemoveDeadNode(N.getNode());
7513 };
7514
7515 SDLoc DL(Op);
7516
7517 // Because getNegatedExpression can delete nodes we need a handle to keep
7518 // temporary nodes alive in case the recursion manages to create an identical
7519 // node.
7520 std::list<HandleSDNode> Handles;
7521
7522 switch (Opcode) {
7523 case ISD::ConstantFP: {
7524 // Don't invert constant FP values after legalization unless the target says
7525 // the negated constant is legal.
7526 bool IsOpLegal =
7528 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7529 OptForSize);
7530
7531 if (LegalOps && !IsOpLegal)
7532 break;
7533
7534 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7535 V.changeSign();
7536 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7537
7538 // If we already have the use of the negated floating constant, it is free
7539 // to negate it even it has multiple uses.
7540 if (!Op.hasOneUse() && CFP.use_empty())
7541 break;
7543 return CFP;
7544 }
7545 case ISD::BUILD_VECTOR: {
7546 // Only permit BUILD_VECTOR of constants.
7547 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7548 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7549 }))
7550 break;
7551
7552 bool IsOpLegal =
7555 llvm::all_of(Op->op_values(), [&](SDValue N) {
7556 return N.isUndef() ||
7557 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7558 OptForSize);
7559 });
7560
7561 if (LegalOps && !IsOpLegal)
7562 break;
7563
7565 for (SDValue C : Op->op_values()) {
7566 if (C.isUndef()) {
7567 Ops.push_back(C);
7568 continue;
7569 }
7570 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7571 V.changeSign();
7572 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7573 }
7575 return DAG.getBuildVector(VT, DL, Ops);
7576 }
7577 case ISD::FADD: {
7578 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7579 break;
7580
7581 // After operation legalization, it might not be legal to create new FSUBs.
7582 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7583 break;
7584 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7585
7586 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7588 SDValue NegX =
7589 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7590 // Prevent this node from being deleted by the next call.
7591 if (NegX)
7592 Handles.emplace_back(NegX);
7593
7594 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7596 SDValue NegY =
7597 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7598
7599 // We're done with the handles.
7600 Handles.clear();
7601
7602 // Negate the X if its cost is less or equal than Y.
7603 if (NegX && (CostX <= CostY)) {
7604 Cost = CostX;
7605 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7606 if (NegY != N)
7607 RemoveDeadNode(NegY);
7608 return N;
7609 }
7610
7611 // Negate the Y if it is not expensive.
7612 if (NegY) {
7613 Cost = CostY;
7614 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7615 if (NegX != N)
7616 RemoveDeadNode(NegX);
7617 return N;
7618 }
7619 break;
7620 }
7621 case ISD::FSUB: {
7622 // We can't turn -(A-B) into B-A when we honor signed zeros.
7623 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7624 break;
7625
7626 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7627 // fold (fneg (fsub 0, Y)) -> Y
7628 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7629 if (C->isZero()) {
7631 return Y;
7632 }
7633
7634 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7636 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7637 }
7638 case ISD::FMUL:
7639 case ISD::FDIV: {
7640 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7641
7642 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7644 SDValue NegX =
7645 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7646 // Prevent this node from being deleted by the next call.
7647 if (NegX)
7648 Handles.emplace_back(NegX);
7649
7650 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7652 SDValue NegY =
7653 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7654
7655 // We're done with the handles.
7656 Handles.clear();
7657
7658 // Negate the X if its cost is less or equal than Y.
7659 if (NegX && (CostX <= CostY)) {
7660 Cost = CostX;
7661 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7662 if (NegY != N)
7663 RemoveDeadNode(NegY);
7664 return N;
7665 }
7666
7667 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7668 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7669 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7670 break;
7671
7672 // Negate the Y if it is not expensive.
7673 if (NegY) {
7674 Cost = CostY;
7675 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7676 if (NegX != N)
7677 RemoveDeadNode(NegX);
7678 return N;
7679 }
7680 break;
7681 }
7682 case ISD::FMA:
7683 case ISD::FMAD: {
7684 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7685 break;
7686
7687 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7689 SDValue NegZ =
7690 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7691 // Give up if fail to negate the Z.
7692 if (!NegZ)
7693 break;
7694
7695 // Prevent this node from being deleted by the next two calls.
7696 Handles.emplace_back(NegZ);
7697
7698 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7700 SDValue NegX =
7701 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7702 // Prevent this node from being deleted by the next call.
7703 if (NegX)
7704 Handles.emplace_back(NegX);
7705
7706 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7708 SDValue NegY =
7709 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7710
7711 // We're done with the handles.
7712 Handles.clear();
7713
7714 // Negate the X if its cost is less or equal than Y.
7715 if (NegX && (CostX <= CostY)) {
7716 Cost = std::min(CostX, CostZ);
7717 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7718 if (NegY != N)
7719 RemoveDeadNode(NegY);
7720 return N;
7721 }
7722
7723 // Negate the Y if it is not expensive.
7724 if (NegY) {
7725 Cost = std::min(CostY, CostZ);
7726 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7727 if (NegX != N)
7728 RemoveDeadNode(NegX);
7729 return N;
7730 }
7731 break;
7732 }
7733
7734 case ISD::FP_EXTEND:
7735 case ISD::FSIN:
7736 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7737 OptForSize, Cost, Depth))
7738 return DAG.getNode(Opcode, DL, VT, NegV);
7739 break;
7740 case ISD::FP_ROUND:
7741 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7742 OptForSize, Cost, Depth))
7743 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7744 break;
7745 case ISD::SELECT:
7746 case ISD::VSELECT: {
7747 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7748 // iff at least one cost is cheaper and the other is neutral/cheaper
7749 SDValue LHS = Op.getOperand(1);
7751 SDValue NegLHS =
7752 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7753 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7754 RemoveDeadNode(NegLHS);
7755 break;
7756 }
7757
7758 // Prevent this node from being deleted by the next call.
7759 Handles.emplace_back(NegLHS);
7760
7761 SDValue RHS = Op.getOperand(2);
7763 SDValue NegRHS =
7764 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7765
7766 // We're done with the handles.
7767 Handles.clear();
7768
7769 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7770 (CostLHS != NegatibleCost::Cheaper &&
7771 CostRHS != NegatibleCost::Cheaper)) {
7772 RemoveDeadNode(NegLHS);
7773 RemoveDeadNode(NegRHS);
7774 break;
7775 }
7776
7777 Cost = std::min(CostLHS, CostRHS);
7778 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7779 }
7780 }
7781
7782 return SDValue();
7783}
7784
7785//===----------------------------------------------------------------------===//
7786// Legalization Utilities
7787//===----------------------------------------------------------------------===//
7788
7789bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7790 SDValue LHS, SDValue RHS,
7792 EVT HiLoVT, SelectionDAG &DAG,
7793 MulExpansionKind Kind, SDValue LL,
7794 SDValue LH, SDValue RL, SDValue RH) const {
7795 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7796 Opcode == ISD::SMUL_LOHI);
7797
7798 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7800 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7802 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7804 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7806
7807 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7808 return false;
7809
7810 unsigned OuterBitSize = VT.getScalarSizeInBits();
7811 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7812
7813 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7814 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7815 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7816
7817 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7818 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7819 bool Signed) -> bool {
7820 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7821 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7822 Hi = SDValue(Lo.getNode(), 1);
7823 return true;
7824 }
7825 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7826 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7827 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7828 return true;
7829 }
7830 return false;
7831 };
7832
7833 SDValue Lo, Hi;
7834
7835 if (!LL.getNode() && !RL.getNode() &&
7837 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7838 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7839 }
7840
7841 if (!LL.getNode())
7842 return false;
7843
7844 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7845 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7846 DAG.MaskedValueIsZero(RHS, HighMask)) {
7847 // The inputs are both zero-extended.
7848 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7849 Result.push_back(Lo);
7850 Result.push_back(Hi);
7851 if (Opcode != ISD::MUL) {
7852 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7853 Result.push_back(Zero);
7854 Result.push_back(Zero);
7855 }
7856 return true;
7857 }
7858 }
7859
7860 if (!VT.isVector() && Opcode == ISD::MUL &&
7861 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7862 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7863 // The input values are both sign-extended.
7864 // TODO non-MUL case?
7865 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7866 Result.push_back(Lo);
7867 Result.push_back(Hi);
7868 return true;
7869 }
7870 }
7871
7872 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7873 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7874
7875 if (!LH.getNode() && !RH.getNode() &&
7878 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7879 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7880 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7881 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7882 }
7883
7884 if (!LH.getNode())
7885 return false;
7886
7887 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7888 return false;
7889
7890 Result.push_back(Lo);
7891
7892 if (Opcode == ISD::MUL) {
7893 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7894 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7895 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7896 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7897 Result.push_back(Hi);
7898 return true;
7899 }
7900
7901 // Compute the full width result.
7902 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7903 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7904 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7905 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7906 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7907 };
7908
7909 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7910 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7911 return false;
7912
7913 // This is effectively the add part of a multiply-add of half-sized operands,
7914 // so it cannot overflow.
7915 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7916
7917 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7918 return false;
7919
7920 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7921 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7922
7923 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7925 if (UseGlue)
7926 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7927 Merge(Lo, Hi));
7928 else
7929 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7930 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7931
7932 SDValue Carry = Next.getValue(1);
7933 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7934 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7935
7936 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7937 return false;
7938
7939 if (UseGlue)
7940 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7941 Carry);
7942 else
7943 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7944 Zero, Carry);
7945
7946 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7947
7948 if (Opcode == ISD::SMUL_LOHI) {
7949 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7950 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7951 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7952
7953 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7954 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7955 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7956 }
7957
7958 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7959 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7960 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7961 return true;
7962}
7963
7965 SelectionDAG &DAG, MulExpansionKind Kind,
7966 SDValue LL, SDValue LH, SDValue RL,
7967 SDValue RH) const {
7969 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7970 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7971 DAG, Kind, LL, LH, RL, RH);
7972 if (Ok) {
7973 assert(Result.size() == 2);
7974 Lo = Result[0];
7975 Hi = Result[1];
7976 }
7977 return Ok;
7978}
7979
7980// Optimize unsigned division or remainder by constants for types twice as large
7981// as a legal VT.
7982//
7983// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7984// can be computed
7985// as:
7986// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7987// Remainder = Sum % Constant
7988// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7989//
7990// For division, we can compute the remainder using the algorithm described
7991// above, subtract it from the dividend to get an exact multiple of Constant.
7992// Then multiply that exact multiply by the multiplicative inverse modulo
7993// (1 << (BitWidth / 2)) to get the quotient.
7994
7995// If Constant is even, we can shift right the dividend and the divisor by the
7996// number of trailing zeros in Constant before applying the remainder algorithm.
7997// If we're after the quotient, we can subtract this value from the shifted
7998// dividend and multiply by the multiplicative inverse of the shifted divisor.
7999// If we want the remainder, we shift the value left by the number of trailing
8000// zeros and add the bits that were shifted out of the dividend.
8003 EVT HiLoVT, SelectionDAG &DAG,
8004 SDValue LL, SDValue LH) const {
8005 unsigned Opcode = N->getOpcode();
8006 EVT VT = N->getValueType(0);
8007
8008 // TODO: Support signed division/remainder.
8009 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8010 return false;
8011 assert(
8012 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8013 "Unexpected opcode");
8014
8015 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8016 if (!CN)
8017 return false;
8018
8019 APInt Divisor = CN->getAPIntValue();
8020 unsigned BitWidth = Divisor.getBitWidth();
8021 unsigned HBitWidth = BitWidth / 2;
8023 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8024
8025 // Divisor needs to less than (1 << HBitWidth).
8026 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8027 if (Divisor.uge(HalfMaxPlus1))
8028 return false;
8029
8030 // We depend on the UREM by constant optimization in DAGCombiner that requires
8031 // high multiply.
8032 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8034 return false;
8035
8036 // Don't expand if optimizing for size.
8037 if (DAG.shouldOptForSize())
8038 return false;
8039
8040 // Early out for 0 or 1 divisors.
8041 if (Divisor.ule(1))
8042 return false;
8043
8044 // If the divisor is even, shift it until it becomes odd.
8045 unsigned TrailingZeros = 0;
8046 if (!Divisor[0]) {
8047 TrailingZeros = Divisor.countr_zero();
8048 Divisor.lshrInPlace(TrailingZeros);
8049 }
8050
8051 SDLoc dl(N);
8052 SDValue Sum;
8053 SDValue PartialRem;
8054
8055 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
8056 // then add in the carry.
8057 // TODO: If we can't split it in half, we might be able to split into 3 or
8058 // more pieces using a smaller bit width.
8059 if (HalfMaxPlus1.urem(Divisor).isOne()) {
8060 assert(!LL == !LH && "Expected both input halves or no input halves!");
8061 if (!LL)
8062 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8063
8064 // Shift the input by the number of TrailingZeros in the divisor. The
8065 // shifted out bits will be added to the remainder later.
8066 if (TrailingZeros) {
8067 // Save the shifted off bits if we need the remainder.
8068 if (Opcode != ISD::UDIV) {
8069 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8070 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8071 DAG.getConstant(Mask, dl, HiLoVT));
8072 }
8073
8074 LL = DAG.getNode(
8075 ISD::OR, dl, HiLoVT,
8076 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
8077 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
8078 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
8079 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
8080 HiLoVT, dl)));
8081 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
8082 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8083 }
8084
8085 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8086 EVT SetCCType =
8087 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8089 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8090 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8091 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8092 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8093 } else {
8094 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8095 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8096 // If the boolean for the target is 0 or 1, we can add the setcc result
8097 // directly.
8098 if (getBooleanContents(HiLoVT) ==
8100 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8101 else
8102 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8103 DAG.getConstant(0, dl, HiLoVT));
8104 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8105 }
8106 }
8107
8108 // If we didn't find a sum, we can't do the expansion.
8109 if (!Sum)
8110 return false;
8111
8112 // Perform a HiLoVT urem on the Sum using truncated divisor.
8113 SDValue RemL =
8114 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8115 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8116 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8117
8118 if (Opcode != ISD::UREM) {
8119 // Subtract the remainder from the shifted dividend.
8120 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8121 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8122
8123 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8124
8125 // Multiply by the multiplicative inverse of the divisor modulo
8126 // (1 << BitWidth).
8127 APInt MulFactor = Divisor.multiplicativeInverse();
8128
8129 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8130 DAG.getConstant(MulFactor, dl, VT));
8131
8132 // Split the quotient into low and high parts.
8133 SDValue QuotL, QuotH;
8134 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8135 Result.push_back(QuotL);
8136 Result.push_back(QuotH);
8137 }
8138
8139 if (Opcode != ISD::UDIV) {
8140 // If we shifted the input, shift the remainder left and add the bits we
8141 // shifted off the input.
8142 if (TrailingZeros) {
8143 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8144 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8145 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
8146 }
8147 Result.push_back(RemL);
8148 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
8149 }
8150
8151 return true;
8152}
8153
8154// Check that (every element of) Z is undef or not an exact multiple of BW.
8155static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8157 Z,
8158 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8159 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8160}
8161
8163 EVT VT = Node->getValueType(0);
8164 SDValue ShX, ShY;
8165 SDValue ShAmt, InvShAmt;
8166 SDValue X = Node->getOperand(0);
8167 SDValue Y = Node->getOperand(1);
8168 SDValue Z = Node->getOperand(2);
8169 SDValue Mask = Node->getOperand(3);
8170 SDValue VL = Node->getOperand(4);
8171
8172 unsigned BW = VT.getScalarSizeInBits();
8173 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8174 SDLoc DL(SDValue(Node, 0));
8175
8176 EVT ShVT = Z.getValueType();
8177 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8178 // fshl: X << C | Y >> (BW - C)
8179 // fshr: X << (BW - C) | Y >> C
8180 // where C = Z % BW is not zero
8181 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8182 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8183 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8184 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8185 VL);
8186 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8187 VL);
8188 } else {
8189 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8190 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8191 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8192 if (isPowerOf2_32(BW)) {
8193 // Z % BW -> Z & (BW - 1)
8194 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8195 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8196 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8197 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8198 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8199 } else {
8200 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8201 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8202 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8203 }
8204
8205 SDValue One = DAG.getConstant(1, DL, ShVT);
8206 if (IsFSHL) {
8207 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8208 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8209 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8210 } else {
8211 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8212 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8213 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8214 }
8215 }
8216 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8217}
8218
8220 SelectionDAG &DAG) const {
8221 if (Node->isVPOpcode())
8222 return expandVPFunnelShift(Node, DAG);
8223
8224 EVT VT = Node->getValueType(0);
8225
8226 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8230 return SDValue();
8231
8232 SDValue X = Node->getOperand(0);
8233 SDValue Y = Node->getOperand(1);
8234 SDValue Z = Node->getOperand(2);
8235
8236 unsigned BW = VT.getScalarSizeInBits();
8237 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8238 SDLoc DL(SDValue(Node, 0));
8239
8240 EVT ShVT = Z.getValueType();
8241
8242 // If a funnel shift in the other direction is more supported, use it.
8243 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8244 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8245 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8246 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8247 // fshl X, Y, Z -> fshr X, Y, -Z
8248 // fshr X, Y, Z -> fshl X, Y, -Z
8249 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8250 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8251 } else {
8252 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8253 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8254 SDValue One = DAG.getConstant(1, DL, ShVT);
8255 if (IsFSHL) {
8256 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8257 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8258 } else {
8259 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8260 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8261 }
8262 Z = DAG.getNOT(DL, Z, ShVT);
8263 }
8264 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8265 }
8266
8267 SDValue ShX, ShY;
8268 SDValue ShAmt, InvShAmt;
8269 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8270 // fshl: X << C | Y >> (BW - C)
8271 // fshr: X << (BW - C) | Y >> C
8272 // where C = Z % BW is not zero
8273 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8274 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8275 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8276 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8277 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8278 } else {
8279 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8280 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8281 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8282 if (isPowerOf2_32(BW)) {
8283 // Z % BW -> Z & (BW - 1)
8284 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8285 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8286 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8287 } else {
8288 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8289 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8290 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8291 }
8292
8293 SDValue One = DAG.getConstant(1, DL, ShVT);
8294 if (IsFSHL) {
8295 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8296 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8297 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8298 } else {
8299 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8300 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8301 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8302 }
8303 }
8304 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8305}
8306
8307// TODO: Merge with expandFunnelShift.
8309 SelectionDAG &DAG) const {
8310 EVT VT = Node->getValueType(0);
8311 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8312 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8313 SDValue Op0 = Node->getOperand(0);
8314 SDValue Op1 = Node->getOperand(1);
8315 SDLoc DL(SDValue(Node, 0));
8316
8317 EVT ShVT = Op1.getValueType();
8318 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8319
8320 // If a rotate in the other direction is more supported, use it.
8321 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8322 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8323 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8324 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8325 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8326 }
8327
8328 if (!AllowVectorOps && VT.isVector() &&
8334 return SDValue();
8335
8336 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8337 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8338 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8339 SDValue ShVal;
8340 SDValue HsVal;
8341 if (isPowerOf2_32(EltSizeInBits)) {
8342 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8343 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8344 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8345 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8346 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8347 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8348 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8349 } else {
8350 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8351 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8352 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8353 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8354 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8355 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8356 SDValue One = DAG.getConstant(1, DL, ShVT);
8357 HsVal =
8358 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8359 }
8360 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8361}
8362
8364 SelectionDAG &DAG) const {
8365 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8366 EVT VT = Node->getValueType(0);
8367 unsigned VTBits = VT.getScalarSizeInBits();
8368 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8369
8370 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8371 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8372 SDValue ShOpLo = Node->getOperand(0);
8373 SDValue ShOpHi = Node->getOperand(1);
8374 SDValue ShAmt = Node->getOperand(2);
8375 EVT ShAmtVT = ShAmt.getValueType();
8376 EVT ShAmtCCVT =
8377 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8378 SDLoc dl(Node);
8379
8380 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8381 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8382 // away during isel.
8383 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8384 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8385 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8386 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8387 : DAG.getConstant(0, dl, VT);
8388
8389 SDValue Tmp2, Tmp3;
8390 if (IsSHL) {
8391 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8392 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8393 } else {
8394 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8395 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8396 }
8397
8398 // If the shift amount is larger or equal than the width of a part we don't
8399 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8400 // values for large shift amounts.
8401 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8402 DAG.getConstant(VTBits, dl, ShAmtVT));
8403 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8404 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8405
8406 if (IsSHL) {
8407 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8408 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8409 } else {
8410 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8411 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8412 }
8413}
8414
8416 SelectionDAG &DAG) const {
8417 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8418 SDValue Src = Node->getOperand(OpNo);
8419 EVT SrcVT = Src.getValueType();
8420 EVT DstVT = Node->getValueType(0);
8421 SDLoc dl(SDValue(Node, 0));
8422
8423 // FIXME: Only f32 to i64 conversions are supported.
8424 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8425 return false;
8426
8427 if (Node->isStrictFPOpcode())
8428 // When a NaN is converted to an integer a trap is allowed. We can't
8429 // use this expansion here because it would eliminate that trap. Other
8430 // traps are also allowed and cannot be eliminated. See
8431 // IEEE 754-2008 sec 5.8.
8432 return false;
8433
8434 // Expand f32 -> i64 conversion
8435 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8436 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8437 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8438 EVT IntVT = SrcVT.changeTypeToInteger();
8439 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8440
8441 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8442 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8443 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8444 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8445 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8446 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8447
8448 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8449
8450 SDValue ExponentBits = DAG.getNode(
8451 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8452 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8453 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8454
8455 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8456 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8457 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8458 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8459
8460 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8461 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8462 DAG.getConstant(0x00800000, dl, IntVT));
8463
8464 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8465
8466 R = DAG.getSelectCC(
8467 dl, Exponent, ExponentLoBit,
8468 DAG.getNode(ISD::SHL, dl, DstVT, R,
8469 DAG.getZExtOrTrunc(
8470 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8471 dl, IntShVT)),
8472 DAG.getNode(ISD::SRL, dl, DstVT, R,
8473 DAG.getZExtOrTrunc(
8474 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8475 dl, IntShVT)),
8476 ISD::SETGT);
8477
8478 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8479 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8480
8481 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8482 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8483 return true;
8484}
8485
8487 SDValue &Chain,
8488 SelectionDAG &DAG) const {
8489 SDLoc dl(SDValue(Node, 0));
8490 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8491 SDValue Src = Node->getOperand(OpNo);
8492
8493 EVT SrcVT = Src.getValueType();
8494 EVT DstVT = Node->getValueType(0);
8495 EVT SetCCVT =
8496 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8497 EVT DstSetCCVT =
8498 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8499
8500 // Only expand vector types if we have the appropriate vector bit operations.
8501 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8503 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8505 return false;
8506
8507 // If the maximum float value is smaller then the signed integer range,
8508 // the destination signmask can't be represented by the float, so we can
8509 // just use FP_TO_SINT directly.
8510 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8511 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8512 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8514 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8515 if (Node->isStrictFPOpcode()) {
8516 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8517 { Node->getOperand(0), Src });
8518 Chain = Result.getValue(1);
8519 } else
8520 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8521 return true;
8522 }
8523
8524 // Don't expand it if there isn't cheap fsub instruction.
8526 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8527 return false;
8528
8529 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8530 SDValue Sel;
8531
8532 if (Node->isStrictFPOpcode()) {
8533 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8534 Node->getOperand(0), /*IsSignaling*/ true);
8535 Chain = Sel.getValue(1);
8536 } else {
8537 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8538 }
8539
8540 bool Strict = Node->isStrictFPOpcode() ||
8541 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8542
8543 if (Strict) {
8544 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8545 // signmask then offset (the result of which should be fully representable).
8546 // Sel = Src < 0x8000000000000000
8547 // FltOfs = select Sel, 0, 0x8000000000000000
8548 // IntOfs = select Sel, 0, 0x8000000000000000
8549 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8550
8551 // TODO: Should any fast-math-flags be set for the FSUB?
8552 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8553 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8554 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8555 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8556 DAG.getConstant(0, dl, DstVT),
8557 DAG.getConstant(SignMask, dl, DstVT));
8558 SDValue SInt;
8559 if (Node->isStrictFPOpcode()) {
8560 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8561 { Chain, Src, FltOfs });
8562 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8563 { Val.getValue(1), Val });
8564 Chain = SInt.getValue(1);
8565 } else {
8566 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8567 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8568 }
8569 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8570 } else {
8571 // Expand based on maximum range of FP_TO_SINT:
8572 // True = fp_to_sint(Src)
8573 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8574 // Result = select (Src < 0x8000000000000000), True, False
8575
8576 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8577 // TODO: Should any fast-math-flags be set for the FSUB?
8578 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8579 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8580 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8581 DAG.getConstant(SignMask, dl, DstVT));
8582 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8583 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8584 }
8585 return true;
8586}
8587
8589 SDValue &Chain, SelectionDAG &DAG) const {
8590 // This transform is not correct for converting 0 when rounding mode is set
8591 // to round toward negative infinity which will produce -0.0. So disable
8592 // under strictfp.
8593 if (Node->isStrictFPOpcode())
8594 return false;
8595
8596 SDValue Src = Node->getOperand(0);
8597 EVT SrcVT = Src.getValueType();
8598 EVT DstVT = Node->getValueType(0);
8599
8600 // If the input is known to be non-negative and SINT_TO_FP is legal then use
8601 // it.
8602 if (Node->getFlags().hasNonNeg() &&
8604 Result =
8605 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8606 return true;
8607 }
8608
8609 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8610 return false;
8611
8612 // Only expand vector types if we have the appropriate vector bit
8613 // operations.
8614 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8619 return false;
8620
8621 SDLoc dl(SDValue(Node, 0));
8622
8623 // Implementation of unsigned i64 to f64 following the algorithm in
8624 // __floatundidf in compiler_rt. This implementation performs rounding
8625 // correctly in all rounding modes with the exception of converting 0
8626 // when rounding toward negative infinity. In that case the fsub will
8627 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8628 // incorrect.
8629 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8630 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8631 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8632 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8633 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8634 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
8635
8636 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8637 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8638 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8639 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8640 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8641 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8642 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8643 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8644 return true;
8645}
8646
8647SDValue
8649 SelectionDAG &DAG) const {
8650 unsigned Opcode = Node->getOpcode();
8651 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8652 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8653 "Wrong opcode");
8654
8655 if (Node->getFlags().hasNoNaNs()) {
8656 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8657 EVT VT = Node->getValueType(0);
8658 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8660 VT.isVector())
8661 return SDValue();
8662 SDValue Op1 = Node->getOperand(0);
8663 SDValue Op2 = Node->getOperand(1);
8664 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
8665 Node->getFlags());
8666 }
8667
8668 return SDValue();
8669}
8670
8672 SelectionDAG &DAG) const {
8673 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8674 return Expanded;
8675
8676 EVT VT = Node->getValueType(0);
8677 if (VT.isScalableVector())
8679 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8680
8681 SDLoc dl(Node);
8682 unsigned NewOp =
8683 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8684
8685 if (isOperationLegalOrCustom(NewOp, VT)) {
8686 SDValue Quiet0 = Node->getOperand(0);
8687 SDValue Quiet1 = Node->getOperand(1);
8688
8689 if (!Node->getFlags().hasNoNaNs()) {
8690 // Insert canonicalizes if it's possible we need to quiet to get correct
8691 // sNaN behavior.
8692 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8693 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8694 Node->getFlags());
8695 }
8696 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8697 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8698 Node->getFlags());
8699 }
8700 }
8701
8702 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8703 }
8704
8705 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8706 // instead if there are no NaNs and there can't be an incompatible zero
8707 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8708 if ((Node->getFlags().hasNoNaNs() ||
8709 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8710 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8711 (Node->getFlags().hasNoSignedZeros() ||
8712 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8713 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8714 unsigned IEEE2018Op =
8715 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8716 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8717 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8718 Node->getOperand(1), Node->getFlags());
8719 }
8720
8722 return SelCC;
8723
8724 return SDValue();
8725}
8726
8728 SelectionDAG &DAG) const {
8729 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8730 return Expanded;
8731
8732 SDLoc DL(N);
8733 SDValue LHS = N->getOperand(0);
8734 SDValue RHS = N->getOperand(1);
8735 unsigned Opc = N->getOpcode();
8736 EVT VT = N->getValueType(0);
8737 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8738 bool IsMax = Opc == ISD::FMAXIMUM;
8739 SDNodeFlags Flags = N->getFlags();
8740
8741 // First, implement comparison not propagating NaN. If no native fmin or fmax
8742 // available, use plain select with setcc instead.
8744 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8745 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8746
8747 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8748 // signed zero behavior.
8749 bool MinMaxMustRespectOrderedZero = false;
8750
8751 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8752 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8753 MinMaxMustRespectOrderedZero = true;
8754 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8755 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8756 } else {
8758 return DAG.UnrollVectorOp(N);
8759
8760 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8761 SDValue Compare =
8762 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8763 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8764 }
8765
8766 // Propagate any NaN of both operands
8767 if (!N->getFlags().hasNoNaNs() &&
8768 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8769 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8771 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8772 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8773 }
8774
8775 // fminimum/fmaximum requires -0.0 less than +0.0
8776 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8777 !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8778 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8779 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8780 SDValue TestZero =
8781 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8782 SDValue LCmp = DAG.getSelect(
8783 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8784 MinMax, Flags);
8785 SDValue RCmp = DAG.getSelect(
8786 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8787 LCmp, Flags);
8788 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8789 }
8790
8791 return MinMax;
8792}
8793
8795 SelectionDAG &DAG) const {
8796 SDLoc DL(Node);
8797 SDValue LHS = Node->getOperand(0);
8798 SDValue RHS = Node->getOperand(1);
8799 unsigned Opc = Node->getOpcode();
8800 EVT VT = Node->getValueType(0);
8801 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8802 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8803 const TargetOptions &Options = DAG.getTarget().Options;
8804 SDNodeFlags Flags = Node->getFlags();
8805
8806 unsigned NewOp =
8807 Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8808
8809 if (isOperationLegalOrCustom(NewOp, VT)) {
8810 if (!Flags.hasNoNaNs()) {
8811 // Insert canonicalizes if it's possible we need to quiet to get correct
8812 // sNaN behavior.
8813 if (!DAG.isKnownNeverSNaN(LHS)) {
8814 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8815 }
8816 if (!DAG.isKnownNeverSNaN(RHS)) {
8817 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8818 }
8819 }
8820
8821 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8822 }
8823
8824 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8825 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8826 if (Flags.hasNoNaNs() ||
8827 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8828 unsigned IEEE2019Op =
8829 Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8830 if (isOperationLegalOrCustom(IEEE2019Op, VT))
8831 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8832 }
8833
8834 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8835 // either one for +0.0 vs -0.0.
8836 if ((Flags.hasNoNaNs() ||
8837 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8838 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8839 DAG.isKnownNeverZeroFloat(RHS))) {
8840 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8841 if (isOperationLegalOrCustom(IEEE2008Op, VT))
8842 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8843 }
8844
8846 return DAG.UnrollVectorOp(Node);
8847
8848 // If only one operand is NaN, override it with another operand.
8849 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8850 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8851 }
8852 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8853 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8854 }
8855
8856 SDValue MinMax =
8857 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8858
8859 // TODO: We need quiet sNaN if strictfp.
8860
8861 // Fixup signed zero behavior.
8862 if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8863 DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
8864 return MinMax;
8865 }
8866 SDValue TestZero =
8867 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8868 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8869 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8870 SDValue LCmp = DAG.getSelect(
8871 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8872 MinMax, Flags);
8873 SDValue RCmp = DAG.getSelect(
8874 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8875 Flags);
8876 return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8877}
8878
8879/// Returns a true value if if this FPClassTest can be performed with an ordered
8880/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8881/// std::nullopt if it cannot be performed as a compare with 0.
8882static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8883 const fltSemantics &Semantics,
8884 const MachineFunction &MF) {
8885 FPClassTest OrderedMask = Test & ~fcNan;
8886 FPClassTest NanTest = Test & fcNan;
8887 bool IsOrdered = NanTest == fcNone;
8888 bool IsUnordered = NanTest == fcNan;
8889
8890 // Skip cases that are testing for only a qnan or snan.
8891 if (!IsOrdered && !IsUnordered)
8892 return std::nullopt;
8893
8894 if (OrderedMask == fcZero &&
8895 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8896 return IsOrdered;
8897 if (OrderedMask == (fcZero | fcSubnormal) &&
8898 MF.getDenormalMode(Semantics).inputsAreZero())
8899 return IsOrdered;
8900 return std::nullopt;
8901}
8902
8904 const FPClassTest OrigTestMask,
8905 SDNodeFlags Flags, const SDLoc &DL,
8906 SelectionDAG &DAG) const {
8907 EVT OperandVT = Op.getValueType();
8908 assert(OperandVT.isFloatingPoint());
8909 FPClassTest Test = OrigTestMask;
8910
8911 // Degenerated cases.
8912 if (Test == fcNone)
8913 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8914 if (Test == fcAllFlags)
8915 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8916
8917 // PPC double double is a pair of doubles, of which the higher part determines
8918 // the value class.
8919 if (OperandVT == MVT::ppcf128) {
8920 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8921 DAG.getConstant(1, DL, MVT::i32));
8922 OperandVT = MVT::f64;
8923 }
8924
8925 // Floating-point type properties.
8926 EVT ScalarFloatVT = OperandVT.getScalarType();
8927 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8928 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8929 bool IsF80 = (ScalarFloatVT == MVT::f80);
8930
8931 // Some checks can be implemented using float comparisons, if floating point
8932 // exceptions are ignored.
8933 if (Flags.hasNoFPExcept() &&
8935 FPClassTest FPTestMask = Test;
8936 bool IsInvertedFP = false;
8937
8938 if (FPClassTest InvertedFPCheck =
8939 invertFPClassTestIfSimpler(FPTestMask, true)) {
8940 FPTestMask = InvertedFPCheck;
8941 IsInvertedFP = true;
8942 }
8943
8944 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8945 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8946
8947 // See if we can fold an | fcNan into an unordered compare.
8948 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8949
8950 // Can't fold the ordered check if we're only testing for snan or qnan
8951 // individually.
8952 if ((FPTestMask & fcNan) != fcNan)
8953 OrderedFPTestMask = FPTestMask;
8954
8955 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8956
8957 if (std::optional<bool> IsCmp0 =
8958 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8959 IsCmp0 && (isCondCodeLegalOrCustom(
8960 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8961 OperandVT.getScalarType().getSimpleVT()))) {
8962
8963 // If denormals could be implicitly treated as 0, this is not equivalent
8964 // to a compare with 0 since it will also be true for denormals.
8965 return DAG.getSetCC(DL, ResultVT, Op,
8966 DAG.getConstantFP(0.0, DL, OperandVT),
8967 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8968 }
8969
8970 if (FPTestMask == fcNan &&
8972 OperandVT.getScalarType().getSimpleVT()))
8973 return DAG.getSetCC(DL, ResultVT, Op, Op,
8974 IsInvertedFP ? ISD::SETO : ISD::SETUO);
8975
8976 bool IsOrderedInf = FPTestMask == fcInf;
8977 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8978 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8979 : UnorderedCmpOpcode,
8980 OperandVT.getScalarType().getSimpleVT()) &&
8981 isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&
8983 (OperandVT.isVector() &&
8985 // isinf(x) --> fabs(x) == inf
8986 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8987 SDValue Inf =
8988 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8989 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8990 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8991 }
8992
8993 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
8994 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
8995 : UnorderedCmpOpcode,
8996 OperandVT.getSimpleVT())) {
8997 // isposinf(x) --> x == inf
8998 // isneginf(x) --> x == -inf
8999 // isposinf(x) || nan --> x u== inf
9000 // isneginf(x) || nan --> x u== -inf
9001
9002 SDValue Inf = DAG.getConstantFP(
9003 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
9004 OperandVT);
9005 return DAG.getSetCC(DL, ResultVT, Op, Inf,
9006 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9007 }
9008
9009 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9010 // TODO: Could handle ordered case, but it produces worse code for
9011 // x86. Maybe handle ordered if fabs is free?
9012
9013 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9014 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9015
9016 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9017 OperandVT.getScalarType().getSimpleVT())) {
9018 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9019
9020 // TODO: Maybe only makes sense if fabs is free. Integer test of
9021 // exponent bits seems better for x86.
9022 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9023 SDValue SmallestNormal = DAG.getConstantFP(
9024 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9025 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9026 IsOrdered ? OrderedOp : UnorderedOp);
9027 }
9028 }
9029
9030 if (FPTestMask == fcNormal) {
9031 // TODO: Handle unordered
9032 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9033 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9034
9035 if (isCondCodeLegalOrCustom(IsFiniteOp,
9036 OperandVT.getScalarType().getSimpleVT()) &&
9037 isCondCodeLegalOrCustom(IsNormalOp,
9038 OperandVT.getScalarType().getSimpleVT()) &&
9039 isFAbsFree(OperandVT)) {
9040 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9041 SDValue Inf =
9042 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9043 SDValue SmallestNormal = DAG.getConstantFP(
9044 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9045
9046 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9047 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9048 SDValue IsNormal =
9049 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9050 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9051 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9052 }
9053 }
9054 }
9055
9056 // Some checks may be represented as inversion of simpler check, for example
9057 // "inf|normal|subnormal|zero" => !"nan".
9058 bool IsInverted = false;
9059
9060 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9061 Test = InvertedCheck;
9062 IsInverted = true;
9063 }
9064
9065 // In the general case use integer operations.
9066 unsigned BitSize = OperandVT.getScalarSizeInBits();
9067 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
9068 if (OperandVT.isVector())
9069 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
9070 OperandVT.getVectorElementCount());
9071 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9072
9073 // Various masks.
9074 APInt SignBit = APInt::getSignMask(BitSize);
9075 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9076 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9077 const unsigned ExplicitIntBitInF80 = 63;
9078 APInt ExpMask = Inf;
9079 if (IsF80)
9080 ExpMask.clearBit(ExplicitIntBitInF80);
9081 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9082 APInt QNaNBitMask =
9083 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9084 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9085
9086 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9087 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9088 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9089 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9090 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9091 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9092
9093 SDValue Res;
9094 const auto appendResult = [&](SDValue PartialRes) {
9095 if (PartialRes) {
9096 if (Res)
9097 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9098 else
9099 Res = PartialRes;
9100 }
9101 };
9102
9103 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9104 const auto getIntBitIsSet = [&]() -> SDValue {
9105 if (!IntBitIsSetV) {
9106 APInt IntBitMask(BitSize, 0);
9107 IntBitMask.setBit(ExplicitIntBitInF80);
9108 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9109 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9110 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9111 }
9112 return IntBitIsSetV;
9113 };
9114
9115 // Split the value into sign bit and absolute value.
9116 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9117 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9118 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9119
9120 // Tests that involve more than one class should be processed first.
9121 SDValue PartialRes;
9122
9123 if (IsF80)
9124 ; // Detect finite numbers of f80 by checking individual classes because
9125 // they have different settings of the explicit integer bit.
9126 else if ((Test & fcFinite) == fcFinite) {
9127 // finite(V) ==> abs(V) < exp_mask
9128 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9129 Test &= ~fcFinite;
9130 } else if ((Test & fcFinite) == fcPosFinite) {
9131 // finite(V) && V > 0 ==> V < exp_mask
9132 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9133 Test &= ~fcPosFinite;
9134 } else if ((Test & fcFinite) == fcNegFinite) {
9135 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9136 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9137 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9138 Test &= ~fcNegFinite;
9139 }
9140 appendResult(PartialRes);
9141
9142 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9143 // fcZero | fcSubnormal => test all exponent bits are 0
9144 // TODO: Handle sign bit specific cases
9145 if (PartialCheck == (fcZero | fcSubnormal)) {
9146 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9147 SDValue ExpIsZero =
9148 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9149 appendResult(ExpIsZero);
9150 Test &= ~PartialCheck & fcAllFlags;
9151 }
9152 }
9153
9154 // Check for individual classes.
9155
9156 if (unsigned PartialCheck = Test & fcZero) {
9157 if (PartialCheck == fcPosZero)
9158 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
9159 else if (PartialCheck == fcZero)
9160 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
9161 else // ISD::fcNegZero
9162 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
9163 appendResult(PartialRes);
9164 }
9165
9166 if (unsigned PartialCheck = Test & fcSubnormal) {
9167 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9168 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9169 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9170 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
9171 SDValue VMinusOneV =
9172 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
9173 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
9174 if (PartialCheck == fcNegSubnormal)
9175 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9176 appendResult(PartialRes);
9177 }
9178
9179 if (unsigned PartialCheck = Test & fcInf) {
9180 if (PartialCheck == fcPosInf)
9181 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9182 else if (PartialCheck == fcInf)
9183 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9184 else { // ISD::fcNegInf
9185 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9186 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9187 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9188 }
9189 appendResult(PartialRes);
9190 }
9191
9192 if (unsigned PartialCheck = Test & fcNan) {
9193 APInt InfWithQnanBit = Inf | QNaNBitMask;
9194 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9195 if (PartialCheck == fcNan) {
9196 // isnan(V) ==> abs(V) > int(inf)
9197 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9198 if (IsF80) {
9199 // Recognize unsupported values as NaNs for compatibility with glibc.
9200 // In them (exp(V)==0) == int_bit.
9201 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9202 SDValue ExpIsZero =
9203 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9204 SDValue IsPseudo =
9205 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9206 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9207 }
9208 } else if (PartialCheck == fcQNan) {
9209 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9210 PartialRes =
9211 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9212 } else { // ISD::fcSNan
9213 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9214 // abs(V) < (unsigned(Inf) | quiet_bit)
9215 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9216 SDValue IsNotQnan =
9217 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9218 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9219 }
9220 appendResult(PartialRes);
9221 }
9222
9223 if (unsigned PartialCheck = Test & fcNormal) {
9224 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9225 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9226 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9227 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9228 APInt ExpLimit = ExpMask - ExpLSB;
9229 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9230 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9231 if (PartialCheck == fcNegNormal)
9232 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9233 else if (PartialCheck == fcPosNormal) {
9234 SDValue PosSignV =
9235 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
9236 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9237 }
9238 if (IsF80)
9239 PartialRes =
9240 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9241 appendResult(PartialRes);
9242 }
9243
9244 if (!Res)
9245 return DAG.getConstant(IsInverted, DL, ResultVT);
9246 if (IsInverted)
9247 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
9248 return Res;
9249}
9250
9251// Only expand vector types if we have the appropriate vector bit operations.
9252static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9253 assert(VT.isVector() && "Expected vector type");
9254 unsigned Len = VT.getScalarSizeInBits();
9255 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9258 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9260}
9261
9263 SDLoc dl(Node);
9264 EVT VT = Node->getValueType(0);
9265 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9266 SDValue Op = Node->getOperand(0);
9267 unsigned Len = VT.getScalarSizeInBits();
9268 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9269
9270 // TODO: Add support for irregular type lengths.
9271 if (!(Len <= 128 && Len % 8 == 0))
9272 return SDValue();
9273
9274 // Only expand vector types if we have the appropriate vector bit operations.
9275 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9276 return SDValue();
9277
9278 // This is the "best" algorithm from
9279 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9280 SDValue Mask55 =
9281 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9282 SDValue Mask33 =
9283 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9284 SDValue Mask0F =
9285 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9286
9287 // v = v - ((v >> 1) & 0x55555555...)
9288 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9289 DAG.getNode(ISD::AND, dl, VT,
9290 DAG.getNode(ISD::SRL, dl, VT, Op,
9291 DAG.getConstant(1, dl, ShVT)),
9292 Mask55));
9293 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9294 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9295 DAG.getNode(ISD::AND, dl, VT,
9296 DAG.getNode(ISD::SRL, dl, VT, Op,
9297 DAG.getConstant(2, dl, ShVT)),
9298 Mask33));
9299 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9300 Op = DAG.getNode(ISD::AND, dl, VT,
9301 DAG.getNode(ISD::ADD, dl, VT, Op,
9302 DAG.getNode(ISD::SRL, dl, VT, Op,
9303 DAG.getConstant(4, dl, ShVT))),
9304 Mask0F);
9305
9306 if (Len <= 8)
9307 return Op;
9308
9309 // Avoid the multiply if we only have 2 bytes to add.
9310 // TODO: Only doing this for scalars because vectors weren't as obviously
9311 // improved.
9312 if (Len == 16 && !VT.isVector()) {
9313 // v = (v + (v >> 8)) & 0x00FF;
9314 return DAG.getNode(ISD::AND, dl, VT,
9315 DAG.getNode(ISD::ADD, dl, VT, Op,
9316 DAG.getNode(ISD::SRL, dl, VT, Op,
9317 DAG.getConstant(8, dl, ShVT))),
9318 DAG.getConstant(0xFF, dl, VT));
9319 }
9320
9321 // v = (v * 0x01010101...) >> (Len - 8)
9322 SDValue V;
9325 SDValue Mask01 =
9326 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9327 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9328 } else {
9329 V = Op;
9330 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9331 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9332 V = DAG.getNode(ISD::ADD, dl, VT, V,
9333 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9334 }
9335 }
9336 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9337}
9338
9340 SDLoc dl(Node);
9341 EVT VT = Node->getValueType(0);
9342 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9343 SDValue Op = Node->getOperand(0);
9344 SDValue Mask = Node->getOperand(1);
9345 SDValue VL = Node->getOperand(2);
9346 unsigned Len = VT.getScalarSizeInBits();
9347 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9348
9349 // TODO: Add support for irregular type lengths.
9350 if (!(Len <= 128 && Len % 8 == 0))
9351 return SDValue();
9352
9353 // This is same algorithm of expandCTPOP from
9354 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9355 SDValue Mask55 =
9356 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9357 SDValue Mask33 =
9358 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9359 SDValue Mask0F =
9360 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9361
9362 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9363
9364 // v = v - ((v >> 1) & 0x55555555...)
9365 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9366 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9367 DAG.getConstant(1, dl, ShVT), Mask, VL),
9368 Mask55, Mask, VL);
9369 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9370
9371 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9372 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9373 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9374 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9375 DAG.getConstant(2, dl, ShVT), Mask, VL),
9376 Mask33, Mask, VL);
9377 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9378
9379 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9380 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9381 Mask, VL),
9382 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9383 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9384
9385 if (Len <= 8)
9386 return Op;
9387
9388 // v = (v * 0x01010101...) >> (Len - 8)
9389 SDValue V;
9391 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9392 SDValue Mask01 =
9393 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9394 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9395 } else {
9396 V = Op;
9397 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9398 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9399 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9400 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9401 Mask, VL);
9402 }
9403 }
9404 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9405 Mask, VL);
9406}
9407
9409 SDLoc dl(Node);
9410 EVT VT = Node->getValueType(0);
9411 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9412 SDValue Op = Node->getOperand(0);
9413 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9414
9415 // If the non-ZERO_UNDEF version is supported we can use that instead.
9416 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9418 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9419
9420 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9422 EVT SetCCVT =
9423 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9424 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9425 SDValue Zero = DAG.getConstant(0, dl, VT);
9426 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9427 return DAG.getSelect(dl, VT, SrcIsZero,
9428 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9429 }
9430
9431 // Only expand vector types if we have the appropriate vector bit operations.
9432 // This includes the operations needed to expand CTPOP if it isn't supported.
9433 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9435 !canExpandVectorCTPOP(*this, VT)) ||
9438 return SDValue();
9439
9440 // for now, we do this:
9441 // x = x | (x >> 1);
9442 // x = x | (x >> 2);
9443 // ...
9444 // x = x | (x >>16);
9445 // x = x | (x >>32); // for 64-bit input
9446 // return popcount(~x);
9447 //
9448 // Ref: "Hacker's Delight" by Henry Warren
9449 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9450 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9451 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9452 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9453 }
9454 Op = DAG.getNOT(dl, Op, VT);
9455 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9456}
9457
9459 SDLoc dl(Node);
9460 EVT VT = Node->getValueType(0);
9461 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9462 SDValue Op = Node->getOperand(0);
9463 SDValue Mask = Node->getOperand(1);
9464 SDValue VL = Node->getOperand(2);
9465 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9466
9467 // do this:
9468 // x = x | (x >> 1);
9469 // x = x | (x >> 2);
9470 // ...
9471 // x = x | (x >>16);
9472 // x = x | (x >>32); // for 64-bit input
9473 // return popcount(~x);
9474 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9475 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9476 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9477 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9478 VL);
9479 }
9480 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9481 Mask, VL);
9482 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9483}
9484
9486 const SDLoc &DL, EVT VT, SDValue Op,
9487 unsigned BitWidth) const {
9488 if (BitWidth != 32 && BitWidth != 64)
9489 return SDValue();
9490 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9491 : APInt(64, 0x0218A392CD3D5DBFULL);
9492 const DataLayout &TD = DAG.getDataLayout();
9493 MachinePointerInfo PtrInfo =
9495 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9496 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9497 SDValue Lookup = DAG.getNode(
9498 ISD::SRL, DL, VT,
9499 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9500 DAG.getConstant(DeBruijn, DL, VT)),
9501 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
9503
9505 for (unsigned i = 0; i < BitWidth; i++) {
9506 APInt Shl = DeBruijn.shl(i);
9507 APInt Lshr = Shl.lshr(ShiftAmt);
9508 Table[Lshr.getZExtValue()] = i;
9509 }
9510
9511 // Create a ConstantArray in Constant Pool
9512 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9513 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9514 TD.getPrefTypeAlign(CA->getType()));
9515 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9516 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9517 PtrInfo, MVT::i8);
9518 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9519 return ExtLoad;
9520
9521 EVT SetCCVT =
9522 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9523 SDValue Zero = DAG.getConstant(0, DL, VT);
9524 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9525 return DAG.getSelect(DL, VT, SrcIsZero,
9526 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9527}
9528
9530 SDLoc dl(Node);
9531 EVT VT = Node->getValueType(0);
9532 SDValue Op = Node->getOperand(0);
9533 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9534
9535 // If the non-ZERO_UNDEF version is supported we can use that instead.
9536 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9538 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9539
9540 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9542 EVT SetCCVT =
9543 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9544 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9545 SDValue Zero = DAG.getConstant(0, dl, VT);
9546 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9547 return DAG.getSelect(dl, VT, SrcIsZero,
9548 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9549 }
9550
9551 // Only expand vector types if we have the appropriate vector bit operations.
9552 // This includes the operations needed to expand CTPOP if it isn't supported.
9553 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9556 !canExpandVectorCTPOP(*this, VT)) ||
9560 return SDValue();
9561
9562 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9563 // to be expanded or converted to a libcall.
9566 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9567 return V;
9568
9569 // for now, we use: { return popcount(~x & (x - 1)); }
9570 // unless the target has ctlz but not ctpop, in which case we use:
9571 // { return 32 - nlz(~x & (x-1)); }
9572 // Ref: "Hacker's Delight" by Henry Warren
9573 SDValue Tmp = DAG.getNode(
9574 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9575 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9576
9577 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9579 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9580 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9581 }
9582
9583 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9584}
9585
9587 SDValue Op = Node->getOperand(0);
9588 SDValue Mask = Node->getOperand(1);
9589 SDValue VL = Node->getOperand(2);
9590 SDLoc dl(Node);
9591 EVT VT = Node->getValueType(0);
9592
9593 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9594 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9595 DAG.getAllOnesConstant(dl, VT), Mask, VL);
9596 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9597 DAG.getConstant(1, dl, VT), Mask, VL);
9598 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9599 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9600}
9601
9603 SelectionDAG &DAG) const {
9604 // %cond = to_bool_vec %source
9605 // %splat = splat /*val=*/VL
9606 // %tz = step_vector
9607 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9608 // %r = vp.reduce.umin %v
9609 SDLoc DL(N);
9610 SDValue Source = N->getOperand(0);
9611 SDValue Mask = N->getOperand(1);
9612 SDValue EVL = N->getOperand(2);
9613 EVT SrcVT = Source.getValueType();
9614 EVT ResVT = N->getValueType(0);
9615 EVT ResVecVT =
9616 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9617
9618 // Convert to boolean vector.
9619 if (SrcVT.getScalarType() != MVT::i1) {
9620 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9621 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9622 SrcVT.getVectorElementCount());
9623 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9624 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9625 }
9626
9627 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9628 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9629 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9630 SDValue Select =
9631 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9632 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9633}
9634
9636 SelectionDAG &DAG) const {
9637 SDLoc DL(N);
9638 SDValue Mask = N->getOperand(0);
9639 EVT MaskVT = Mask.getValueType();
9640 EVT BoolVT = MaskVT.getScalarType();
9641
9642 // Find a suitable type for a stepvector.
9643 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
9644 if (MaskVT.isScalableVector())
9645 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
9646 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9647 unsigned EltWidth = TLI.getBitWidthForCttzElements(
9648 BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(),
9649 /*ZeroIsPoison=*/true, &VScaleRange);
9650 EVT StepVT = MVT::getIntegerVT(EltWidth);
9651 EVT StepVecVT = MaskVT.changeVectorElementType(StepVT);
9652
9653 // If promotion is required to make the type legal, do it here; promotion
9654 // of integers within LegalizeVectorOps is looking for types of the same
9655 // size but with a smaller number of larger elements, not the usual larger
9656 // size with the same number of larger elements.
9657 if (TLI.getTypeAction(StepVecVT.getSimpleVT()) ==
9659 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
9660 StepVT = StepVecVT.getVectorElementType();
9661 }
9662
9663 // Zero out lanes with inactive elements, then find the highest remaining
9664 // value from the stepvector.
9665 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
9666 SDValue StepVec = DAG.getStepVector(DL, StepVecVT);
9667 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
9668 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
9669 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
9670}
9671
9673 bool IsNegative) const {
9674 SDLoc dl(N);
9675 EVT VT = N->getValueType(0);
9676 SDValue Op = N->getOperand(0);
9677
9678 // abs(x) -> smax(x,sub(0,x))
9679 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9681 SDValue Zero = DAG.getConstant(0, dl, VT);
9682 Op = DAG.getFreeze(Op);
9683 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9684 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9685 }
9686
9687 // abs(x) -> umin(x,sub(0,x))
9688 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9690 SDValue Zero = DAG.getConstant(0, dl, VT);
9691 Op = DAG.getFreeze(Op);
9692 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9693 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9694 }
9695
9696 // 0 - abs(x) -> smin(x, sub(0,x))
9697 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9699 SDValue Zero = DAG.getConstant(0, dl, VT);
9700 Op = DAG.getFreeze(Op);
9701 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9702 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9703 }
9704
9705 // Only expand vector types if we have the appropriate vector operations.
9706 if (VT.isVector() &&
9708 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9709 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9711 return SDValue();
9712
9713 Op = DAG.getFreeze(Op);
9714 SDValue Shift = DAG.getNode(
9715 ISD::SRA, dl, VT, Op,
9716 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9717 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9718
9719 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9720 if (!IsNegative)
9721 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9722
9723 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9724 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9725}
9726
9728 SDLoc dl(N);
9729 EVT VT = N->getValueType(0);
9730 SDValue LHS = N->getOperand(0);
9731 SDValue RHS = N->getOperand(1);
9732 bool IsSigned = N->getOpcode() == ISD::ABDS;
9733
9734 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9735 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9736 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9737 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9738 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9739 LHS = DAG.getFreeze(LHS);
9740 RHS = DAG.getFreeze(RHS);
9741 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9742 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9743 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9744 }
9745
9746 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9747 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
9748 LHS = DAG.getFreeze(LHS);
9749 RHS = DAG.getFreeze(RHS);
9750 return DAG.getNode(ISD::OR, dl, VT,
9751 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9752 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9753 }
9754
9755 // If the subtract doesn't overflow then just use abs(sub())
9756 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
9757
9758 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
9759 return DAG.getNode(ISD::ABS, dl, VT,
9760 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9761
9762 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
9763 return DAG.getNode(ISD::ABS, dl, VT,
9764 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9765
9766 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9768 LHS = DAG.getFreeze(LHS);
9769 RHS = DAG.getFreeze(RHS);
9770 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9771
9772 // Branchless expansion iff cmp result is allbits:
9773 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9774 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9775 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9776 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9777 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9778 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9779 }
9780
9781 // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9782 // flag if the (scalar) type is illegal as this is more likely to legalize
9783 // cleanly:
9784 // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9785 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9786 SDValue USubO =
9787 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9788 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9789 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9790 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9791 }
9792
9793 // FIXME: Should really try to split the vector in case it's legal on a
9794 // subvector.
9796 return DAG.UnrollVectorOp(N);
9797
9798 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9799 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9800 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9801 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9802}
9803
9805 SDLoc dl(N);
9806 EVT VT = N->getValueType(0);
9807 SDValue LHS = N->getOperand(0);
9808 SDValue RHS = N->getOperand(1);
9809
9810 unsigned Opc = N->getOpcode();
9811 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9812 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9813 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9814 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9815 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9816 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9818 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9819 "Unknown AVG node");
9820
9821 // If the operands are already extended, we can add+shift.
9822 bool IsExt =
9823 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9824 DAG.ComputeNumSignBits(RHS) >= 2) ||
9825 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9826 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9827 if (IsExt) {
9828 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9829 if (!IsFloor)
9830 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9831 return DAG.getNode(ShiftOpc, dl, VT, Sum,
9832 DAG.getShiftAmountConstant(1, VT, dl));
9833 }
9834
9835 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9836 if (VT.isScalarInteger()) {
9837 unsigned BW = VT.getScalarSizeInBits();
9838 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9839 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9840 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9841 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9842 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9843 if (!IsFloor)
9844 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9845 DAG.getConstant(1, dl, ExtVT));
9846 // Just use SRL as we will be truncating away the extended sign bits.
9847 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9848 DAG.getShiftAmountConstant(1, ExtVT, dl));
9849 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9850 }
9851 }
9852
9853 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9854 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9855 SDValue UAddWithOverflow =
9856 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9857
9858 SDValue Sum = UAddWithOverflow.getValue(0);
9859 SDValue Overflow = UAddWithOverflow.getValue(1);
9860
9861 // Right shift the sum by 1
9862 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9863 DAG.getShiftAmountConstant(1, VT, dl));
9864
9865 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9866 SDValue OverflowShl = DAG.getNode(
9867 ISD::SHL, dl, VT, ZeroExtOverflow,
9868 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9869
9870 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9871 }
9872
9873 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9874 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9875 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9876 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9877 LHS = DAG.getFreeze(LHS);
9878 RHS = DAG.getFreeze(RHS);
9879 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9880 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9881 SDValue Shift =
9882 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9883 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9884}
9885
9887 SDLoc dl(N);
9888 EVT VT = N->getValueType(0);
9889 SDValue Op = N->getOperand(0);
9890
9891 if (!VT.isSimple())
9892 return SDValue();
9893
9894 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9895 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9896 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9897 default:
9898 return SDValue();
9899 case MVT::i16:
9900 // Use a rotate by 8. This can be further expanded if necessary.
9901 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9902 case MVT::i32:
9903 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9904 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9905 DAG.getConstant(0xFF00, dl, VT));
9906 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9907 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9908 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9909 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9910 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9911 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9912 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9913 case MVT::i64:
9914 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9915 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9916 DAG.getConstant(255ULL<<8, dl, VT));
9917 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9918 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9919 DAG.getConstant(255ULL<<16, dl, VT));
9920 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9921 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9922 DAG.getConstant(255ULL<<24, dl, VT));
9923 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9924 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9925 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9926 DAG.getConstant(255ULL<<24, dl, VT));
9927 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9928 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9929 DAG.getConstant(255ULL<<16, dl, VT));
9930 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9931 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9932 DAG.getConstant(255ULL<<8, dl, VT));
9933 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9934 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9935 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9936 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9937 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9938 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9939 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9940 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9941 }
9942}
9943
9945 SDLoc dl(N);
9946 EVT VT = N->getValueType(0);
9947 SDValue Op = N->getOperand(0);
9948 SDValue Mask = N->getOperand(1);
9949 SDValue EVL = N->getOperand(2);
9950
9951 if (!VT.isSimple())
9952 return SDValue();
9953
9954 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9955 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9956 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9957 default:
9958 return SDValue();
9959 case MVT::i16:
9960 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9961 Mask, EVL);
9962 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9963 Mask, EVL);
9964 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9965 case MVT::i32:
9966 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9967 Mask, EVL);
9968 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9969 Mask, EVL);
9970 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9971 Mask, EVL);
9972 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9973 Mask, EVL);
9974 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9975 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9976 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9977 Mask, EVL);
9978 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9979 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9980 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9981 case MVT::i64:
9982 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9983 Mask, EVL);
9984 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9985 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9986 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9987 Mask, EVL);
9988 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9989 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9990 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9991 Mask, EVL);
9992 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9993 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9994 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9995 Mask, EVL);
9996 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9997 Mask, EVL);
9998 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9999 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10000 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10001 Mask, EVL);
10002 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
10003 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10004 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
10005 Mask, EVL);
10006 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10007 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10008 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10009 Mask, EVL);
10010 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
10011 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
10012 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10013 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10014 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
10015 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10016 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
10017 }
10018}
10019
10021 SDLoc dl(N);
10022 EVT VT = N->getValueType(0);
10023 SDValue Op = N->getOperand(0);
10024 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10025 unsigned Sz = VT.getScalarSizeInBits();
10026
10027 SDValue Tmp, Tmp2, Tmp3;
10028
10029 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10030 // and finally the i1 pairs.
10031 // TODO: We can easily support i4/i2 legal types if any target ever does.
10032 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10033 // Create the masks - repeating the pattern every byte.
10034 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10035 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10036 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10037
10038 // BSWAP if the type is wider than a single byte.
10039 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
10040
10041 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10042 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
10043 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
10044 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
10045 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
10046 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10047
10048 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10049 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
10050 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
10051 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
10052 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
10053 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10054
10055 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10056 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
10057 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
10058 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
10059 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
10060 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10061 return Tmp;
10062 }
10063
10064 Tmp = DAG.getConstant(0, dl, VT);
10065 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
10066 if (I < J)
10067 Tmp2 =
10068 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
10069 else
10070 Tmp2 =
10071 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
10072
10073 APInt Shift = APInt::getOneBitSet(Sz, J);
10074 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
10075 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
10076 }
10077
10078 return Tmp;
10079}
10080
10082 assert(N->getOpcode() == ISD::VP_BITREVERSE);
10083
10084 SDLoc dl(N);
10085 EVT VT = N->getValueType(0);
10086 SDValue Op = N->getOperand(0);
10087 SDValue Mask = N->getOperand(1);
10088 SDValue EVL = N->getOperand(2);
10089 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10090 unsigned Sz = VT.getScalarSizeInBits();
10091
10092 SDValue Tmp, Tmp2, Tmp3;
10093
10094 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10095 // and finally the i1 pairs.
10096 // TODO: We can easily support i4/i2 legal types if any target ever does.
10097 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10098 // Create the masks - repeating the pattern every byte.
10099 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10100 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10101 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10102
10103 // BSWAP if the type is wider than a single byte.
10104 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
10105
10106 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10107 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
10108 Mask, EVL);
10109 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10110 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
10111 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
10112 Mask, EVL);
10113 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
10114 Mask, EVL);
10115 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10116
10117 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10118 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
10119 Mask, EVL);
10120 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10121 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
10122 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
10123 Mask, EVL);
10124 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
10125 Mask, EVL);
10126 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10127
10128 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10129 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
10130 Mask, EVL);
10131 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10132 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
10133 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
10134 Mask, EVL);
10135 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
10136 Mask, EVL);
10137 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10138 return Tmp;
10139 }
10140 return SDValue();
10141}
10142
10143std::pair<SDValue, SDValue>
10145 SelectionDAG &DAG) const {
10146 SDLoc SL(LD);
10147 SDValue Chain = LD->getChain();
10148 SDValue BasePTR = LD->getBasePtr();
10149 EVT SrcVT = LD->getMemoryVT();
10150 EVT DstVT = LD->getValueType(0);
10151 ISD::LoadExtType ExtType = LD->getExtensionType();
10152
10153 if (SrcVT.isScalableVector())
10154 report_fatal_error("Cannot scalarize scalable vector loads");
10155
10156 unsigned NumElem = SrcVT.getVectorNumElements();
10157
10158 EVT SrcEltVT = SrcVT.getScalarType();
10159 EVT DstEltVT = DstVT.getScalarType();
10160
10161 // A vector must always be stored in memory as-is, i.e. without any padding
10162 // between the elements, since various code depend on it, e.g. in the
10163 // handling of a bitcast of a vector type to int, which may be done with a
10164 // vector store followed by an integer load. A vector that does not have
10165 // elements that are byte-sized must therefore be stored as an integer
10166 // built out of the extracted vector elements.
10167 if (!SrcEltVT.isByteSized()) {
10168 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10169 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
10170
10171 unsigned NumSrcBits = SrcVT.getSizeInBits();
10172 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
10173
10174 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10175 SDValue SrcEltBitMask = DAG.getConstant(
10176 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
10177
10178 // Load the whole vector and avoid masking off the top bits as it makes
10179 // the codegen worse.
10180 SDValue Load =
10181 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
10182 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
10183 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10184
10186 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10187 unsigned ShiftIntoIdx =
10188 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10189 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10190 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
10191 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
10192 SDValue Elt =
10193 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
10194 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
10195
10196 if (ExtType != ISD::NON_EXTLOAD) {
10197 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
10198 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
10199 }
10200
10201 Vals.push_back(Scalar);
10202 }
10203
10204 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10205 return std::make_pair(Value, Load.getValue(1));
10206 }
10207
10208 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10209 assert(SrcEltVT.isByteSized());
10210
10212 SmallVector<SDValue, 8> LoadChains;
10213
10214 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10215 SDValue ScalarLoad = DAG.getExtLoad(
10216 ExtType, SL, DstEltVT, Chain, BasePTR,
10217 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
10218 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10219
10220 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10221
10222 Vals.push_back(ScalarLoad.getValue(0));
10223 LoadChains.push_back(ScalarLoad.getValue(1));
10224 }
10225
10226 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10227 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10228
10229 return std::make_pair(Value, NewChain);
10230}
10231
10233 SelectionDAG &DAG) const {
10234 SDLoc SL(ST);
10235
10236 SDValue Chain = ST->getChain();
10237 SDValue BasePtr = ST->getBasePtr();
10238 SDValue Value = ST->getValue();
10239 EVT StVT = ST->getMemoryVT();
10240
10241 if (StVT.isScalableVector())
10242 report_fatal_error("Cannot scalarize scalable vector stores");
10243
10244 // The type of the data we want to save
10245 EVT RegVT = Value.getValueType();
10246 EVT RegSclVT = RegVT.getScalarType();
10247
10248 // The type of data as saved in memory.
10249 EVT MemSclVT = StVT.getScalarType();
10250
10251 unsigned NumElem = StVT.getVectorNumElements();
10252
10253 // A vector must always be stored in memory as-is, i.e. without any padding
10254 // between the elements, since various code depend on it, e.g. in the
10255 // handling of a bitcast of a vector type to int, which may be done with a
10256 // vector store followed by an integer load. A vector that does not have
10257 // elements that are byte-sized must therefore be stored as an integer
10258 // built out of the extracted vector elements.
10259 if (!MemSclVT.isByteSized()) {
10260 unsigned NumBits = StVT.getSizeInBits();
10261 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10262
10263 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10264
10265 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10266 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10267 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10268 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10269 unsigned ShiftIntoIdx =
10270 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10271 SDValue ShiftAmount =
10272 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10273 SDValue ShiftedElt =
10274 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10275 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10276 }
10277
10278 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10279 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10280 ST->getAAInfo());
10281 }
10282
10283 // Store Stride in bytes
10284 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10285 assert(Stride && "Zero stride!");
10286 // Extract each of the elements from the original vector and save them into
10287 // memory individually.
10289 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10290 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10291
10292 SDValue Ptr =
10293 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10294
10295 // This scalar TruncStore may be illegal, but we legalize it later.
10296 SDValue Store = DAG.getTruncStore(
10297 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10298 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10299 ST->getAAInfo());
10300
10301 Stores.push_back(Store);
10302 }
10303
10304 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10305}
10306
10307std::pair<SDValue, SDValue>
10309 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10310 "unaligned indexed loads not implemented!");
10311 SDValue Chain = LD->getChain();
10312 SDValue Ptr = LD->getBasePtr();
10313 EVT VT = LD->getValueType(0);
10314 EVT LoadedVT = LD->getMemoryVT();
10315 SDLoc dl(LD);
10316 auto &MF = DAG.getMachineFunction();
10317
10318 if (VT.isFloatingPoint() || VT.isVector()) {
10319 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10320 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10321 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10322 LoadedVT.isVector()) {
10323 // Scalarize the load and let the individual components be handled.
10324 return scalarizeVectorLoad(LD, DAG);
10325 }
10326
10327 // Expand to a (misaligned) integer load of the same size,
10328 // then bitconvert to floating point or vector.
10329 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10330 LD->getMemOperand());
10331 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10332 if (LoadedVT != VT)
10333 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10334 ISD::ANY_EXTEND, dl, VT, Result);
10335
10336 return std::make_pair(Result, newLoad.getValue(1));
10337 }
10338
10339 // Copy the value to a (aligned) stack slot using (unaligned) integer
10340 // loads and stores, then do a (aligned) load from the stack slot.
10341 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10342 unsigned LoadedBytes = LoadedVT.getStoreSize();
10343 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10344 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10345
10346 // Make sure the stack slot is also aligned for the register type.
10347 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10348 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10350 SDValue StackPtr = StackBase;
10351 unsigned Offset = 0;
10352
10353 EVT PtrVT = Ptr.getValueType();
10354 EVT StackPtrVT = StackPtr.getValueType();
10355
10356 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10357 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10358
10359 // Do all but one copies using the full register width.
10360 for (unsigned i = 1; i < NumRegs; i++) {
10361 // Load one integer register's worth from the original location.
10362 SDValue Load = DAG.getLoad(
10363 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10364 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10365 // Follow the load with a store to the stack slot. Remember the store.
10366 Stores.push_back(DAG.getStore(
10367 Load.getValue(1), dl, Load, StackPtr,
10368 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10369 // Increment the pointers.
10370 Offset += RegBytes;
10371
10372 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10373 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10374 }
10375
10376 // The last copy may be partial. Do an extending load.
10377 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10378 8 * (LoadedBytes - Offset));
10379 SDValue Load = DAG.getExtLoad(
10380 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10381 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
10382 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10383 // Follow the load with a store to the stack slot. Remember the store.
10384 // On big-endian machines this requires a truncating store to ensure
10385 // that the bits end up in the right place.
10386 Stores.push_back(DAG.getTruncStore(
10387 Load.getValue(1), dl, Load, StackPtr,
10388 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10389
10390 // The order of the stores doesn't matter - say it with a TokenFactor.
10391 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10392
10393 // Finally, perform the original load only redirected to the stack slot.
10394 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10395 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10396 LoadedVT);
10397
10398 // Callers expect a MERGE_VALUES node.
10399 return std::make_pair(Load, TF);
10400 }
10401
10402 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10403 "Unaligned load of unsupported type.");
10404
10405 // Compute the new VT that is half the size of the old one. This is an
10406 // integer MVT.
10407 unsigned NumBits = LoadedVT.getSizeInBits();
10408 EVT NewLoadedVT;
10409 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10410 NumBits >>= 1;
10411
10412 Align Alignment = LD->getBaseAlign();
10413 unsigned IncrementSize = NumBits / 8;
10414 ISD::LoadExtType HiExtType = LD->getExtensionType();
10415
10416 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10417 if (HiExtType == ISD::NON_EXTLOAD)
10418 HiExtType = ISD::ZEXTLOAD;
10419
10420 // Load the value in two parts
10421 SDValue Lo, Hi;
10422 if (DAG.getDataLayout().isLittleEndian()) {
10423 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10424 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10425 LD->getAAInfo());
10426
10427 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10428 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10429 LD->getPointerInfo().getWithOffset(IncrementSize),
10430 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10431 LD->getAAInfo());
10432 } else {
10433 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10434 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10435 LD->getAAInfo());
10436
10437 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10438 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10439 LD->getPointerInfo().getWithOffset(IncrementSize),
10440 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10441 LD->getAAInfo());
10442 }
10443
10444 // aggregate the two parts
10445 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10446 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10447 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10448
10449 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10450 Hi.getValue(1));
10451
10452 return std::make_pair(Result, TF);
10453}
10454
10456 SelectionDAG &DAG) const {
10457 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10458 "unaligned indexed stores not implemented!");
10459 SDValue Chain = ST->getChain();
10460 SDValue Ptr = ST->getBasePtr();
10461 SDValue Val = ST->getValue();
10462 EVT VT = Val.getValueType();
10463 Align Alignment = ST->getBaseAlign();
10464 auto &MF = DAG.getMachineFunction();
10465 EVT StoreMemVT = ST->getMemoryVT();
10466
10467 SDLoc dl(ST);
10468 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10469 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10470 if (isTypeLegal(intVT)) {
10471 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10472 StoreMemVT.isVector()) {
10473 // Scalarize the store and let the individual components be handled.
10474 SDValue Result = scalarizeVectorStore(ST, DAG);
10475 return Result;
10476 }
10477 // Expand to a bitconvert of the value to the integer type of the
10478 // same size, then a (misaligned) int store.
10479 // FIXME: Does not handle truncating floating point stores!
10480 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10481 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10482 Alignment, ST->getMemOperand()->getFlags());
10483 return Result;
10484 }
10485 // Do a (aligned) store to a stack slot, then copy from the stack slot
10486 // to the final destination using (unaligned) integer loads and stores.
10487 MVT RegVT = getRegisterType(
10488 *DAG.getContext(),
10489 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10490 EVT PtrVT = Ptr.getValueType();
10491 unsigned StoredBytes = StoreMemVT.getStoreSize();
10492 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10493 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10494
10495 // Make sure the stack slot is also aligned for the register type.
10496 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10497 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10498
10499 // Perform the original store, only redirected to the stack slot.
10500 SDValue Store = DAG.getTruncStore(
10501 Chain, dl, Val, StackPtr,
10502 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10503
10504 EVT StackPtrVT = StackPtr.getValueType();
10505
10506 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10507 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10509 unsigned Offset = 0;
10510
10511 // Do all but one copies using the full register width.
10512 for (unsigned i = 1; i < NumRegs; i++) {
10513 // Load one integer register's worth from the stack slot.
10514 SDValue Load = DAG.getLoad(
10515 RegVT, dl, Store, StackPtr,
10516 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10517 // Store it to the final location. Remember the store.
10518 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10519 ST->getPointerInfo().getWithOffset(Offset),
10520 ST->getBaseAlign(),
10521 ST->getMemOperand()->getFlags()));
10522 // Increment the pointers.
10523 Offset += RegBytes;
10524 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10525 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10526 }
10527
10528 // The last store may be partial. Do a truncating store. On big-endian
10529 // machines this requires an extending load from the stack slot to ensure
10530 // that the bits are in the right place.
10531 EVT LoadMemVT =
10532 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10533
10534 // Load from the stack slot.
10535 SDValue Load = DAG.getExtLoad(
10536 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10537 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10538
10539 Stores.push_back(DAG.getTruncStore(
10540 Load.getValue(1), dl, Load, Ptr,
10541 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10542 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10543 // The order of the stores doesn't matter - say it with a TokenFactor.
10544 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10545 return Result;
10546 }
10547
10548 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10549 "Unaligned store of unknown type.");
10550 // Get the half-size VT
10551 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10552 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10553 unsigned IncrementSize = NumBits / 8;
10554
10555 // Divide the stored value in two parts.
10556 SDValue ShiftAmount =
10557 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10558 SDValue Lo = Val;
10559 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10560 // fold and not use the upper bits. A smaller constant may be easier to
10561 // materialize.
10562 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10563 Lo = DAG.getNode(
10564 ISD::AND, dl, VT, Lo,
10565 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10566 VT));
10567 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10568
10569 // Store the two parts
10570 SDValue Store1, Store2;
10571 Store1 = DAG.getTruncStore(Chain, dl,
10572 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10573 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10574 ST->getMemOperand()->getFlags());
10575
10576 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10577 Store2 = DAG.getTruncStore(
10578 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10579 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10580 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10581
10582 SDValue Result =
10583 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10584 return Result;
10585}
10586
10587SDValue
10589 const SDLoc &DL, EVT DataVT,
10590 SelectionDAG &DAG,
10591 bool IsCompressedMemory) const {
10592 SDValue Increment;
10593 EVT AddrVT = Addr.getValueType();
10594 EVT MaskVT = Mask.getValueType();
10595 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10596 "Incompatible types of Data and Mask");
10597 if (IsCompressedMemory) {
10598 if (DataVT.isScalableVector())
10600 "Cannot currently handle compressed memory with scalable vectors");
10601 // Incrementing the pointer according to number of '1's in the mask.
10602 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10603 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10604 if (MaskIntVT.getSizeInBits() < 32) {
10605 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10606 MaskIntVT = MVT::i32;
10607 }
10608
10609 // Count '1's with POPCNT.
10610 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10611 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10612 // Scale is an element size in bytes.
10613 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10614 AddrVT);
10615 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10616 } else if (DataVT.isScalableVector()) {
10617 Increment = DAG.getVScale(DL, AddrVT,
10618 APInt(AddrVT.getFixedSizeInBits(),
10619 DataVT.getStoreSize().getKnownMinValue()));
10620 } else
10621 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10622
10623 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10624}
10625
10627 EVT VecVT, const SDLoc &dl,
10628 ElementCount SubEC) {
10629 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10630 "Cannot index a scalable vector within a fixed-width vector");
10631
10632 unsigned NElts = VecVT.getVectorMinNumElements();
10633 unsigned NumSubElts = SubEC.getKnownMinValue();
10634 EVT IdxVT = Idx.getValueType();
10635
10636 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10637 // If this is a constant index and we know the value plus the number of the
10638 // elements in the subvector minus one is less than the minimum number of
10639 // elements then it's safe to return Idx.
10640 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10641 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10642 return Idx;
10643 SDValue VS =
10644 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10645 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10646 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10647 DAG.getConstant(NumSubElts, dl, IdxVT));
10648 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10649 }
10650 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10651 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10652 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10653 DAG.getConstant(Imm, dl, IdxVT));
10654 }
10655 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10656 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10657 DAG.getConstant(MaxIndex, dl, IdxVT));
10658}
10659
10661 SDValue VecPtr, EVT VecVT,
10662 SDValue Index) const {
10664 DAG, VecPtr, VecVT,
10666 Index);
10667}
10668
10670 SDValue VecPtr, EVT VecVT,
10671 EVT SubVecVT,
10672 SDValue Index) const {
10673 SDLoc dl(Index);
10674 // Make sure the index type is big enough to compute in.
10675 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10676
10677 EVT EltVT = VecVT.getVectorElementType();
10678
10679 // Calculate the element offset and add it to the pointer.
10680 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10681 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10682 "Converting bits to bytes lost precision");
10683 assert(SubVecVT.getVectorElementType() == EltVT &&
10684 "Sub-vector must be a vector with matching element type");
10685 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10686 SubVecVT.getVectorElementCount());
10687
10688 EVT IdxVT = Index.getValueType();
10689 if (SubVecVT.isScalableVector())
10690 Index =
10691 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10692 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10693
10694 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10695 DAG.getConstant(EltSize, dl, IdxVT));
10696 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10697}
10698
10699//===----------------------------------------------------------------------===//
10700// Implementation of Emulated TLS Model
10701//===----------------------------------------------------------------------===//
10702
10704 SelectionDAG &DAG) const {
10705 // Access to address of TLS varialbe xyz is lowered to a function call:
10706 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10707 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10708 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10709 SDLoc dl(GA);
10710
10711 ArgListTy Args;
10712 const GlobalValue *GV =
10714 SmallString<32> NameString("__emutls_v.");
10715 NameString += GV->getName();
10716 StringRef EmuTlsVarName(NameString);
10717 const GlobalVariable *EmuTlsVar =
10718 GV->getParent()->getNamedGlobal(EmuTlsVarName);
10719 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10720 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
10721
10722 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10723
10725 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10726 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10727 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10728
10729 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10730 // At last for X86 targets, maybe good for other targets too?
10732 MFI.setAdjustsStack(true); // Is this only for X86 target?
10733 MFI.setHasCalls(true);
10734
10735 assert((GA->getOffset() == 0) &&
10736 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10737 return CallResult.first;
10738}
10739
10741 SelectionDAG &DAG) const {
10742 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10743 if (!isCtlzFast())
10744 return SDValue();
10745 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10746 SDLoc dl(Op);
10747 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10748 EVT VT = Op.getOperand(0).getValueType();
10749 SDValue Zext = Op.getOperand(0);
10750 if (VT.bitsLT(MVT::i32)) {
10751 VT = MVT::i32;
10752 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10753 }
10754 unsigned Log2b = Log2_32(VT.getSizeInBits());
10755 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10756 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10757 DAG.getConstant(Log2b, dl, MVT::i32));
10758 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10759 }
10760 return SDValue();
10761}
10762
10764 SDValue Op0 = Node->getOperand(0);
10765 SDValue Op1 = Node->getOperand(1);
10766 EVT VT = Op0.getValueType();
10767 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10768 unsigned Opcode = Node->getOpcode();
10769 SDLoc DL(Node);
10770
10771 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10772 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10774 Op0 = DAG.getFreeze(Op0);
10775 SDValue Zero = DAG.getConstant(0, DL, VT);
10776 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10777 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10778 }
10779
10780 // umin(x,y) -> sub(x,usubsat(x,y))
10781 // TODO: Missing freeze(Op0)?
10782 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10784 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10785 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10786 }
10787
10788 // umax(x,y) -> add(x,usubsat(y,x))
10789 // TODO: Missing freeze(Op0)?
10790 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10792 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10793 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10794 }
10795
10796 // FIXME: Should really try to split the vector in case it's legal on a
10797 // subvector.
10799 return DAG.UnrollVectorOp(Node);
10800
10801 // Attempt to find an existing SETCC node that we can reuse.
10802 // TODO: Do we need a generic doesSETCCNodeExist?
10803 // TODO: Missing freeze(Op0)/freeze(Op1)?
10804 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10805 ISD::CondCode PrefCommuteCC,
10806 ISD::CondCode AltCommuteCC) {
10807 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10808 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10809 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10810 {Op0, Op1, DAG.getCondCode(CC)})) {
10811 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10812 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10813 }
10814 }
10815 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10816 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10817 {Op0, Op1, DAG.getCondCode(CC)})) {
10818 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10819 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10820 }
10821 }
10822 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10823 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10824 };
10825
10826 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10827 // -> Y = (A < B) ? B : A
10828 // -> Y = (A >= B) ? A : B
10829 // -> Y = (A <= B) ? B : A
10830 switch (Opcode) {
10831 case ISD::SMAX:
10832 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10833 case ISD::SMIN:
10834 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10835 case ISD::UMAX:
10836 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10837 case ISD::UMIN:
10838 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10839 }
10840
10841 llvm_unreachable("How did we get here?");
10842}
10843
10845 unsigned Opcode = Node->getOpcode();
10846 SDValue LHS = Node->getOperand(0);
10847 SDValue RHS = Node->getOperand(1);
10848 EVT VT = LHS.getValueType();
10849 SDLoc dl(Node);
10850
10851 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10852 assert(VT.isInteger() && "Expected operands to be integers");
10853
10854 // usub.sat(a, b) -> umax(a, b) - b
10855 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10856 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10857 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10858 }
10859
10860 // uadd.sat(a, b) -> umin(a, ~b) + b
10861 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10862 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10863 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10864 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10865 }
10866
10867 unsigned OverflowOp;
10868 switch (Opcode) {
10869 case ISD::SADDSAT:
10870 OverflowOp = ISD::SADDO;
10871 break;
10872 case ISD::UADDSAT:
10873 OverflowOp = ISD::UADDO;
10874 break;
10875 case ISD::SSUBSAT:
10876 OverflowOp = ISD::SSUBO;
10877 break;
10878 case ISD::USUBSAT:
10879 OverflowOp = ISD::USUBO;
10880 break;
10881 default:
10882 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10883 "addition or subtraction node.");
10884 }
10885
10886 // FIXME: Should really try to split the vector in case it's legal on a
10887 // subvector.
10889 return DAG.UnrollVectorOp(Node);
10890
10891 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10892 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10893 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10894 SDValue SumDiff = Result.getValue(0);
10895 SDValue Overflow = Result.getValue(1);
10896 SDValue Zero = DAG.getConstant(0, dl, VT);
10897 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10898
10899 if (Opcode == ISD::UADDSAT) {
10901 // (LHS + RHS) | OverflowMask
10902 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10903 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10904 }
10905 // Overflow ? 0xffff.... : (LHS + RHS)
10906 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10907 }
10908
10909 if (Opcode == ISD::USUBSAT) {
10911 // (LHS - RHS) & ~OverflowMask
10912 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10913 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10914 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10915 }
10916 // Overflow ? 0 : (LHS - RHS)
10917 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10918 }
10919
10920 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10923
10924 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10925 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10926
10927 // If either of the operand signs are known, then they are guaranteed to
10928 // only saturate in one direction. If non-negative they will saturate
10929 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10930 //
10931 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10932 // sign of 'y' has to be flipped.
10933
10934 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10935 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10936 : KnownRHS.isNegative();
10937 if (LHSIsNonNegative || RHSIsNonNegative) {
10938 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10939 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10940 }
10941
10942 bool LHSIsNegative = KnownLHS.isNegative();
10943 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10944 : KnownRHS.isNonNegative();
10945 if (LHSIsNegative || RHSIsNegative) {
10946 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10947 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10948 }
10949 }
10950
10951 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10953 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10954 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10955 DAG.getConstant(BitWidth - 1, dl, VT));
10956 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10957 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10958}
10959
10961 unsigned Opcode = Node->getOpcode();
10962 SDValue LHS = Node->getOperand(0);
10963 SDValue RHS = Node->getOperand(1);
10964 EVT VT = LHS.getValueType();
10965 EVT ResVT = Node->getValueType(0);
10966 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10967 SDLoc dl(Node);
10968
10969 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10970 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10971 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10972 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10973
10974 // We can't perform arithmetic on i1 values. Extending them would
10975 // probably result in worse codegen, so let's just use two selects instead.
10976 // Some targets are also just better off using selects rather than subtraction
10977 // because one of the conditions can be merged with one of the selects.
10978 // And finally, if we don't know the contents of high bits of a boolean value
10979 // we can't perform any arithmetic either.
10980 if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10982 SDValue SelectZeroOrOne =
10983 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10984 DAG.getConstant(0, dl, ResVT));
10985 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10986 SelectZeroOrOne);
10987 }
10988
10990 std::swap(IsGT, IsLT);
10991 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10992 ResVT);
10993}
10994
10996 unsigned Opcode = Node->getOpcode();
10997 bool IsSigned = Opcode == ISD::SSHLSAT;
10998 SDValue LHS = Node->getOperand(0);
10999 SDValue RHS = Node->getOperand(1);
11000 EVT VT = LHS.getValueType();
11001 SDLoc dl(Node);
11002
11003 assert((Node->getOpcode() == ISD::SSHLSAT ||
11004 Node->getOpcode() == ISD::USHLSAT) &&
11005 "Expected a SHLSAT opcode");
11006 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11007 assert(VT.isInteger() && "Expected operands to be integers");
11008
11010 return DAG.UnrollVectorOp(Node);
11011
11012 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
11013
11014 unsigned BW = VT.getScalarSizeInBits();
11015 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11016 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
11017 SDValue Orig =
11018 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
11019
11020 SDValue SatVal;
11021 if (IsSigned) {
11022 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
11023 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
11024 SDValue Cond =
11025 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
11026 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
11027 } else {
11028 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
11029 }
11030 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
11031 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
11032}
11033
11035 bool Signed, SDValue &Lo, SDValue &Hi,
11036 SDValue LHS, SDValue RHS,
11037 SDValue HiLHS, SDValue HiRHS) const {
11038 EVT VT = LHS.getValueType();
11039 assert(RHS.getValueType() == VT && "Mismatching operand types");
11040
11041 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
11042 assert((!Signed || !HiLHS) &&
11043 "Signed flag should only be set when HiLHS and RiRHS are null");
11044
11045 // We'll expand the multiplication by brute force because we have no other
11046 // options. This is a trivially-generalized version of the code from
11047 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
11048 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11049 // sign bits while calculating the Hi half.
11050 unsigned Bits = VT.getSizeInBits();
11051 unsigned HalfBits = Bits / 2;
11052 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
11053 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
11054 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
11055
11056 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
11057 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
11058
11059 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
11060 // This is always an unsigned shift.
11061 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
11062
11063 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11064 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
11065 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
11066
11067 SDValue U =
11068 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
11069 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
11070 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
11071
11072 SDValue V =
11073 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
11074 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
11075
11076 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
11077 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
11078
11079 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
11080 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
11081
11082 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11083 // the products to Hi.
11084 if (HiLHS) {
11085 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
11086 DAG.getNode(ISD::ADD, dl, VT,
11087 DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS),
11088 DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS)));
11089 }
11090}
11091
11093 bool Signed, const SDValue LHS,
11094 const SDValue RHS, SDValue &Lo,
11095 SDValue &Hi) const {
11096 EVT VT = LHS.getValueType();
11097 assert(RHS.getValueType() == VT && "Mismatching operand types");
11098 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
11099 // We can fall back to a libcall with an illegal type for the MUL if we
11100 // have a libcall big enough.
11101 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11102 if (WideVT == MVT::i16)
11103 LC = RTLIB::MUL_I16;
11104 else if (WideVT == MVT::i32)
11105 LC = RTLIB::MUL_I32;
11106 else if (WideVT == MVT::i64)
11107 LC = RTLIB::MUL_I64;
11108 else if (WideVT == MVT::i128)
11109 LC = RTLIB::MUL_I128;
11110
11111 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
11112 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11113 return;
11114 }
11115
11116 SDValue HiLHS, HiRHS;
11117 if (Signed) {
11118 // The high part is obtained by SRA'ing all but one of the bits of low
11119 // part.
11120 unsigned LoSize = VT.getFixedSizeInBits();
11121 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
11122 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
11123 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
11124 } else {
11125 HiLHS = DAG.getConstant(0, dl, VT);
11126 HiRHS = DAG.getConstant(0, dl, VT);
11127 }
11128
11129 // Attempt a libcall.
11130 SDValue Ret;
11132 CallOptions.setIsSigned(Signed);
11133 CallOptions.setIsPostTypeLegalization(true);
11135 // Halves of WideVT are packed into registers in different order
11136 // depending on platform endianness. This is usually handled by
11137 // the C calling convention, but we can't defer to it in
11138 // the legalizer.
11139 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11140 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11141 } else {
11142 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11143 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11144 }
11145 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
11146 "Ret value is a collection of constituent nodes holding result.");
11147 if (DAG.getDataLayout().isLittleEndian()) {
11148 // Same as above.
11149 Lo = Ret.getOperand(0);
11150 Hi = Ret.getOperand(1);
11151 } else {
11152 Lo = Ret.getOperand(1);
11153 Hi = Ret.getOperand(0);
11154 }
11155}
11156
11157SDValue
11159 assert((Node->getOpcode() == ISD::SMULFIX ||
11160 Node->getOpcode() == ISD::UMULFIX ||
11161 Node->getOpcode() == ISD::SMULFIXSAT ||
11162 Node->getOpcode() == ISD::UMULFIXSAT) &&
11163 "Expected a fixed point multiplication opcode");
11164
11165 SDLoc dl(Node);
11166 SDValue LHS = Node->getOperand(0);
11167 SDValue RHS = Node->getOperand(1);
11168 EVT VT = LHS.getValueType();
11169 unsigned Scale = Node->getConstantOperandVal(2);
11170 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
11171 Node->getOpcode() == ISD::UMULFIXSAT);
11172 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
11173 Node->getOpcode() == ISD::SMULFIXSAT);
11174 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11175 unsigned VTSize = VT.getScalarSizeInBits();
11176
11177 if (!Scale) {
11178 // [us]mul.fix(a, b, 0) -> mul(a, b)
11179 if (!Saturating) {
11181 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11182 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
11183 SDValue Result =
11184 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11185 SDValue Product = Result.getValue(0);
11186 SDValue Overflow = Result.getValue(1);
11187 SDValue Zero = DAG.getConstant(0, dl, VT);
11188
11189 APInt MinVal = APInt::getSignedMinValue(VTSize);
11190 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
11191 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11192 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11193 // Xor the inputs, if resulting sign bit is 0 the product will be
11194 // positive, else negative.
11195 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
11196 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
11197 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
11198 return DAG.getSelect(dl, VT, Overflow, Result, Product);
11199 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
11200 SDValue Result =
11201 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11202 SDValue Product = Result.getValue(0);
11203 SDValue Overflow = Result.getValue(1);
11204
11205 APInt MaxVal = APInt::getMaxValue(VTSize);
11206 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11207 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
11208 }
11209 }
11210
11211 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11212 "Expected scale to be less than the number of bits if signed or at "
11213 "most the number of bits if unsigned.");
11214 assert(LHS.getValueType() == RHS.getValueType() &&
11215 "Expected both operands to be the same type");
11216
11217 // Get the upper and lower bits of the result.
11218 SDValue Lo, Hi;
11219 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11220 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11221 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11222 if (VT.isVector())
11223 WideVT =
11225 if (isOperationLegalOrCustom(LoHiOp, VT)) {
11226 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11227 Lo = Result.getValue(0);
11228 Hi = Result.getValue(1);
11229 } else if (isOperationLegalOrCustom(HiOp, VT)) {
11230 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11231 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11232 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11233 // Try for a multiplication using a wider type.
11234 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11235 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11236 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11237 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11238 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11239 SDValue Shifted =
11240 DAG.getNode(ISD::SRA, dl, WideVT, Res,
11241 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11242 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11243 } else if (VT.isVector()) {
11244 return SDValue();
11245 } else {
11246 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11247 }
11248
11249 if (Scale == VTSize)
11250 // Result is just the top half since we'd be shifting by the width of the
11251 // operand. Overflow impossible so this works for both UMULFIX and
11252 // UMULFIXSAT.
11253 return Hi;
11254
11255 // The result will need to be shifted right by the scale since both operands
11256 // are scaled. The result is given to us in 2 halves, so we only want part of
11257 // both in the result.
11258 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11259 DAG.getShiftAmountConstant(Scale, VT, dl));
11260 if (!Saturating)
11261 return Result;
11262
11263 if (!Signed) {
11264 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11265 // widened multiplication) aren't all zeroes.
11266
11267 // Saturate to max if ((Hi >> Scale) != 0),
11268 // which is the same as if (Hi > ((1 << Scale) - 1))
11269 APInt MaxVal = APInt::getMaxValue(VTSize);
11270 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11271 dl, VT);
11272 Result = DAG.getSelectCC(dl, Hi, LowMask,
11273 DAG.getConstant(MaxVal, dl, VT), Result,
11274 ISD::SETUGT);
11275
11276 return Result;
11277 }
11278
11279 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11280 // widened multiplication) aren't all ones or all zeroes.
11281
11282 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11283 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11284
11285 if (Scale == 0) {
11286 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11287 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11288 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11289 // Saturated to SatMin if wide product is negative, and SatMax if wide
11290 // product is positive ...
11291 SDValue Zero = DAG.getConstant(0, dl, VT);
11292 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11293 ISD::SETLT);
11294 // ... but only if we overflowed.
11295 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11296 }
11297
11298 // We handled Scale==0 above so all the bits to examine is in Hi.
11299
11300 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11301 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11302 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11303 dl, VT);
11304 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11305 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11306 // which is the same as if (HI < (-1 << (Scale - 1))
11307 SDValue HighMask =
11308 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11309 dl, VT);
11310 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11311 return Result;
11312}
11313
11314SDValue
11316 SDValue LHS, SDValue RHS,
11317 unsigned Scale, SelectionDAG &DAG) const {
11318 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11319 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11320 "Expected a fixed point division opcode");
11321
11322 EVT VT = LHS.getValueType();
11323 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11324 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11325 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11326
11327 // If there is enough room in the type to upscale the LHS or downscale the
11328 // RHS before the division, we can perform it in this type without having to
11329 // resize. For signed operations, the LHS headroom is the number of
11330 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11331 // The headroom for the RHS is the number of trailing zeroes.
11332 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11334 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11335
11336 // For signed saturating operations, we need to be able to detect true integer
11337 // division overflow; that is, when you have MIN / -EPS. However, this
11338 // is undefined behavior and if we emit divisions that could take such
11339 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11340 // example).
11341 // Avoid this by requiring an extra bit so that we never get this case.
11342 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11343 // signed saturating division, we need to emit a whopping 32-bit division.
11344 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11345 return SDValue();
11346
11347 unsigned LHSShift = std::min(LHSLead, Scale);
11348 unsigned RHSShift = Scale - LHSShift;
11349
11350 // At this point, we know that if we shift the LHS up by LHSShift and the
11351 // RHS down by RHSShift, we can emit a regular division with a final scaling
11352 // factor of Scale.
11353
11354 if (LHSShift)
11355 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11356 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11357 if (RHSShift)
11358 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11359 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11360
11361 SDValue Quot;
11362 if (Signed) {
11363 // For signed operations, if the resulting quotient is negative and the
11364 // remainder is nonzero, subtract 1 from the quotient to round towards
11365 // negative infinity.
11366 SDValue Rem;
11367 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11368 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11369 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11370 if (isTypeLegal(VT) &&
11372 Quot = DAG.getNode(ISD::SDIVREM, dl,
11373 DAG.getVTList(VT, VT),
11374 LHS, RHS);
11375 Rem = Quot.getValue(1);
11376 Quot = Quot.getValue(0);
11377 } else {
11378 Quot = DAG.getNode(ISD::SDIV, dl, VT,
11379 LHS, RHS);
11380 Rem = DAG.getNode(ISD::SREM, dl, VT,
11381 LHS, RHS);
11382 }
11383 SDValue Zero = DAG.getConstant(0, dl, VT);
11384 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11385 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11386 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11387 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11388 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11389 DAG.getConstant(1, dl, VT));
11390 Quot = DAG.getSelect(dl, VT,
11391 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11392 Sub1, Quot);
11393 } else
11394 Quot = DAG.getNode(ISD::UDIV, dl, VT,
11395 LHS, RHS);
11396
11397 return Quot;
11398}
11399
11401 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11402 SDLoc dl(Node);
11403 SDValue LHS = Node->getOperand(0);
11404 SDValue RHS = Node->getOperand(1);
11405 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11406
11407 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11408 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11409 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11410 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11411 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11412 { LHS, RHS, CarryIn });
11413 Result = SDValue(NodeCarry.getNode(), 0);
11414 Overflow = SDValue(NodeCarry.getNode(), 1);
11415 return;
11416 }
11417
11418 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11419 LHS.getValueType(), LHS, RHS);
11420
11421 EVT ResultType = Node->getValueType(1);
11422 EVT SetCCType = getSetCCResultType(
11423 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11424 SDValue SetCC;
11425 if (IsAdd && isOneConstant(RHS)) {
11426 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11427 // the live range of X. We assume comparing with 0 is cheap.
11428 // The general case (X + C) < C is not necessarily beneficial. Although we
11429 // reduce the live range of X, we may introduce the materialization of
11430 // constant C.
11431 SetCC =
11432 DAG.getSetCC(dl, SetCCType, Result,
11433 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11434 } else if (IsAdd && isAllOnesConstant(RHS)) {
11435 // Special case: uaddo X, -1 overflows if X != 0.
11436 SetCC =
11437 DAG.getSetCC(dl, SetCCType, LHS,
11438 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11439 } else {
11440 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11441 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11442 }
11443 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11444}
11445
11447 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11448 SDLoc dl(Node);
11449 SDValue LHS = Node->getOperand(0);
11450 SDValue RHS = Node->getOperand(1);
11451 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11452
11453 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11454 LHS.getValueType(), LHS, RHS);
11455
11456 EVT ResultType = Node->getValueType(1);
11457 EVT OType = getSetCCResultType(
11458 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11459
11460 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11461 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11462 if (isOperationLegal(OpcSat, LHS.getValueType())) {
11463 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11464 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11465 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11466 return;
11467 }
11468
11469 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11470
11471 // For an addition, the result should be less than one of the operands (LHS)
11472 // if and only if the other operand (RHS) is negative, otherwise there will
11473 // be overflow.
11474 // For a subtraction, the result should be less than one of the operands
11475 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11476 // otherwise there will be overflow.
11477 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11478 SDValue ConditionRHS =
11479 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11480
11481 Overflow = DAG.getBoolExtOrTrunc(
11482 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11483 ResultType, ResultType);
11484}
11485
11487 SDValue &Overflow, SelectionDAG &DAG) const {
11488 SDLoc dl(Node);
11489 EVT VT = Node->getValueType(0);
11490 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11491 SDValue LHS = Node->getOperand(0);
11492 SDValue RHS = Node->getOperand(1);
11493 bool isSigned = Node->getOpcode() == ISD::SMULO;
11494
11495 // For power-of-two multiplications we can use a simpler shift expansion.
11496 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11497 const APInt &C = RHSC->getAPIntValue();
11498 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11499 if (C.isPowerOf2()) {
11500 // smulo(x, signed_min) is same as umulo(x, signed_min).
11501 bool UseArithShift = isSigned && !C.isMinSignedValue();
11502 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11503 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11504 Overflow = DAG.getSetCC(dl, SetCCVT,
11505 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11506 dl, VT, Result, ShiftAmt),
11507 LHS, ISD::SETNE);
11508 return true;
11509 }
11510 }
11511
11512 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11513 if (VT.isVector())
11514 WideVT =
11516
11517 SDValue BottomHalf;
11518 SDValue TopHalf;
11519 static const unsigned Ops[2][3] =
11522 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11523 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11524 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11525 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11526 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11527 RHS);
11528 TopHalf = BottomHalf.getValue(1);
11529 } else if (isTypeLegal(WideVT)) {
11530 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11531 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11532 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11533 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11534 SDValue ShiftAmt =
11535 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11536 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11537 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11538 } else {
11539 if (VT.isVector())
11540 return false;
11541
11542 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11543 }
11544
11545 Result = BottomHalf;
11546 if (isSigned) {
11547 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11548 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11549 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11550 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11551 } else {
11552 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11553 DAG.getConstant(0, dl, VT), ISD::SETNE);
11554 }
11555
11556 // Truncate the result if SetCC returns a larger type than needed.
11557 EVT RType = Node->getValueType(1);
11558 if (RType.bitsLT(Overflow.getValueType()))
11559 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11560
11561 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11562 "Unexpected result type for S/UMULO legalization");
11563 return true;
11564}
11565
11567 SDLoc dl(Node);
11568 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11569 SDValue Op = Node->getOperand(0);
11570 EVT VT = Op.getValueType();
11571
11572 // Try to use a shuffle reduction for power of two vectors.
11573 if (VT.isPow2VectorType()) {
11575 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11576 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11577 break;
11578
11579 SDValue Lo, Hi;
11580 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11581 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11582 VT = HalfVT;
11583
11584 // Stop if splitting is enough to make the reduction legal.
11585 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
11586 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
11587 Node->getFlags());
11588 }
11589 }
11590
11591 if (VT.isScalableVector())
11593 "Expanding reductions for scalable vectors is undefined.");
11594
11595 EVT EltVT = VT.getVectorElementType();
11596 unsigned NumElts = VT.getVectorNumElements();
11597
11599 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11600
11601 SDValue Res = Ops[0];
11602 for (unsigned i = 1; i < NumElts; i++)
11603 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11604
11605 // Result type may be wider than element type.
11606 if (EltVT != Node->getValueType(0))
11607 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11608 return Res;
11609}
11610
11612 SDLoc dl(Node);
11613 SDValue AccOp = Node->getOperand(0);
11614 SDValue VecOp = Node->getOperand(1);
11615 SDNodeFlags Flags = Node->getFlags();
11616
11617 EVT VT = VecOp.getValueType();
11618 EVT EltVT = VT.getVectorElementType();
11619
11620 if (VT.isScalableVector())
11622 "Expanding reductions for scalable vectors is undefined.");
11623
11624 unsigned NumElts = VT.getVectorNumElements();
11625
11627 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11628
11629 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11630
11631 SDValue Res = AccOp;
11632 for (unsigned i = 0; i < NumElts; i++)
11633 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11634
11635 return Res;
11636}
11637
11639 SelectionDAG &DAG) const {
11640 EVT VT = Node->getValueType(0);
11641 SDLoc dl(Node);
11642 bool isSigned = Node->getOpcode() == ISD::SREM;
11643 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11644 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11645 SDValue Dividend = Node->getOperand(0);
11646 SDValue Divisor = Node->getOperand(1);
11647 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11648 SDVTList VTs = DAG.getVTList(VT, VT);
11649 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11650 return true;
11651 }
11652 if (isOperationLegalOrCustom(DivOpc, VT)) {
11653 // X % Y -> X-X/Y*Y
11654 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11655 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11656 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11657 return true;
11658 }
11659 return false;
11660}
11661
11663 SelectionDAG &DAG) const {
11664 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11665 SDLoc dl(SDValue(Node, 0));
11666 SDValue Src = Node->getOperand(0);
11667
11668 // DstVT is the result type, while SatVT is the size to which we saturate
11669 EVT SrcVT = Src.getValueType();
11670 EVT DstVT = Node->getValueType(0);
11671
11672 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11673 unsigned SatWidth = SatVT.getScalarSizeInBits();
11674 unsigned DstWidth = DstVT.getScalarSizeInBits();
11675 assert(SatWidth <= DstWidth &&
11676 "Expected saturation width smaller than result width");
11677
11678 // Determine minimum and maximum integer values and their corresponding
11679 // floating-point values.
11680 APInt MinInt, MaxInt;
11681 if (IsSigned) {
11682 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11683 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11684 } else {
11685 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11686 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11687 }
11688
11689 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11690 // libcall emission cannot handle this. Large result types will fail.
11691 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11692 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11693 SrcVT = Src.getValueType();
11694 }
11695
11696 const fltSemantics &Sem = SrcVT.getFltSemantics();
11697 APFloat MinFloat(Sem);
11698 APFloat MaxFloat(Sem);
11699
11700 APFloat::opStatus MinStatus =
11701 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11702 APFloat::opStatus MaxStatus =
11703 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11704 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11705 !(MaxStatus & APFloat::opStatus::opInexact);
11706
11707 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11708 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11709
11710 // If the integer bounds are exactly representable as floats and min/max are
11711 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11712 // of comparisons and selects.
11713 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11714 isOperationLegal(ISD::FMAXNUM, SrcVT);
11715 if (AreExactFloatBounds && MinMaxLegal) {
11716 SDValue Clamped = Src;
11717
11718 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11719 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11720 // Clamp by MaxFloat from above. NaN cannot occur.
11721 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11722 // Convert clamped value to integer.
11723 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11724 dl, DstVT, Clamped);
11725
11726 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11727 // which will cast to zero.
11728 if (!IsSigned)
11729 return FpToInt;
11730
11731 // Otherwise, select 0 if Src is NaN.
11732 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11733 EVT SetCCVT =
11734 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11735 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11736 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11737 }
11738
11739 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11740 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11741
11742 // Result of direct conversion. The assumption here is that the operation is
11743 // non-trapping and it's fine to apply it to an out-of-range value if we
11744 // select it away later.
11745 SDValue FpToInt =
11746 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11747
11748 SDValue Select = FpToInt;
11749
11750 EVT SetCCVT =
11751 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11752
11753 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11754 // MinInt if Src is NaN.
11755 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11756 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11757 // If Src OGT MaxFloat, select MaxInt.
11758 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11759 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11760
11761 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11762 // is already zero.
11763 if (!IsSigned)
11764 return Select;
11765
11766 // Otherwise, select 0 if Src is NaN.
11767 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11768 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11769 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11770}
11771
11773 const SDLoc &dl,
11774 SelectionDAG &DAG) const {
11775 EVT OperandVT = Op.getValueType();
11776 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11777 return Op;
11778 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11779 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11780 // can induce double-rounding which may alter the results. We can
11781 // correct for this using a trick explained in: Boldo, Sylvie, and
11782 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11783 // World Congress. 2005.
11784 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
11785 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
11786
11787 // We can keep the narrow value as-is if narrowing was exact (no
11788 // rounding error), the wide value was NaN (the narrow value is also
11789 // NaN and should be preserved) or if we rounded to the odd value.
11790 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
11791 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11792 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11793 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11794 EVT ResultIntVTCCVT = getSetCCResultType(
11795 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11796 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11797 // The result is already odd so we don't need to do anything.
11798 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11799
11800 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11801 Op.getValueType());
11802 // We keep results which are exact, odd or NaN.
11803 SDValue KeepNarrow =
11804 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
11805 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11806 // We morally performed a round-down if AbsNarrow is smaller than
11807 // AbsWide.
11808 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11809 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
11810 SDValue NarrowIsRd =
11811 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11812 // If the narrow value is odd or exact, pick it.
11813 // Otherwise, narrow is even and corresponds to either the rounded-up
11814 // or rounded-down value. If narrow is the rounded-down value, we want
11815 // the rounded-up value as it will be odd.
11816 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11817 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11818 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11819 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11820}
11821
11823 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11824 SDValue Op = Node->getOperand(0);
11825 EVT VT = Node->getValueType(0);
11826 SDLoc dl(Node);
11827 if (VT.getScalarType() == MVT::bf16) {
11828 if (Node->getConstantOperandVal(1) == 1) {
11829 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11830 }
11831 EVT OperandVT = Op.getValueType();
11832 SDValue IsNaN = DAG.getSetCC(
11833 dl,
11834 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11835 Op, Op, ISD::SETUO);
11836
11837 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11838 // can induce double-rounding which may alter the results. We can
11839 // correct for this using a trick explained in: Boldo, Sylvie, and
11840 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11841 // World Congress. 2005.
11842 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11843 EVT I32 = F32.changeTypeToInteger();
11844 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11845 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11846
11847 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11848 // turning into infinities.
11849 SDValue NaN =
11850 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11851
11852 // Factor in the contribution of the low 16 bits.
11853 SDValue One = DAG.getConstant(1, dl, I32);
11854 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11855 DAG.getShiftAmountConstant(16, I32, dl));
11856 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11857 SDValue RoundingBias =
11858 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11859 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11860
11861 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11862 // 0x80000000.
11863 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11864
11865 // Now that we have rounded, shift the bits into position.
11866 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11867 DAG.getShiftAmountConstant(16, I32, dl));
11868 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11869 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11870 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11871 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11872 }
11873 return SDValue();
11874}
11875
11877 SelectionDAG &DAG) const {
11878 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11879 assert(Node->getValueType(0).isScalableVector() &&
11880 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11881
11882 EVT VT = Node->getValueType(0);
11883 SDValue V1 = Node->getOperand(0);
11884 SDValue V2 = Node->getOperand(1);
11885 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11886 SDLoc DL(Node);
11887
11888 // Expand through memory thusly:
11889 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11890 // Store V1, Ptr
11891 // Store V2, Ptr + sizeof(V1)
11892 // If (Imm < 0)
11893 // TrailingElts = -Imm
11894 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11895 // else
11896 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11897 // Res = Load Ptr
11898
11899 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11900
11902 VT.getVectorElementCount() * 2);
11903 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11904 EVT PtrVT = StackPtr.getValueType();
11905 auto &MF = DAG.getMachineFunction();
11906 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11907 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11908
11909 // Store the lo part of CONCAT_VECTORS(V1, V2)
11910 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11911 // Store the hi part of CONCAT_VECTORS(V1, V2)
11912 SDValue OffsetToV2 = DAG.getVScale(
11913 DL, PtrVT,
11915 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11916 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11917
11918 if (Imm >= 0) {
11919 // Load back the required element. getVectorElementPointer takes care of
11920 // clamping the index if it's out-of-bounds.
11921 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11922 // Load the spliced result
11923 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11925 }
11926
11927 uint64_t TrailingElts = -Imm;
11928
11929 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11930 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11931 SDValue TrailingBytes =
11932 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11933
11934 if (TrailingElts > VT.getVectorMinNumElements()) {
11935 SDValue VLBytes =
11936 DAG.getVScale(DL, PtrVT,
11937 APInt(PtrVT.getFixedSizeInBits(),
11939 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11940 }
11941
11942 // Calculate the start address of the spliced result.
11943 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11944
11945 // Load the spliced result
11946 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11948}
11949
11951 SelectionDAG &DAG) const {
11952 SDLoc DL(Node);
11953 SDValue Vec = Node->getOperand(0);
11954 SDValue Mask = Node->getOperand(1);
11955 SDValue Passthru = Node->getOperand(2);
11956
11957 EVT VecVT = Vec.getValueType();
11958 EVT ScalarVT = VecVT.getScalarType();
11959 EVT MaskVT = Mask.getValueType();
11960 EVT MaskScalarVT = MaskVT.getScalarType();
11961
11962 // Needs to be handled by targets that have scalable vector types.
11963 if (VecVT.isScalableVector())
11964 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11965
11966 SDValue StackPtr = DAG.CreateStackTemporary(
11967 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11968 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11969 MachinePointerInfo PtrInfo =
11971
11972 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11973 SDValue Chain = DAG.getEntryNode();
11974 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11975
11976 bool HasPassthru = !Passthru.isUndef();
11977
11978 // If we have a passthru vector, store it on the stack, overwrite the matching
11979 // positions and then re-write the last element that was potentially
11980 // overwritten even though mask[i] = false.
11981 if (HasPassthru)
11982 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11983
11984 SDValue LastWriteVal;
11985 APInt PassthruSplatVal;
11986 bool IsSplatPassthru =
11987 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11988
11989 if (IsSplatPassthru) {
11990 // As we do not know which position we wrote to last, we cannot simply
11991 // access that index from the passthru vector. So we first check if passthru
11992 // is a splat vector, to use any element ...
11993 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11994 } else if (HasPassthru) {
11995 // ... if it is not a splat vector, we need to get the passthru value at
11996 // position = popcount(mask) and re-load it from the stack before it is
11997 // overwritten in the loop below.
11998 EVT PopcountVT = ScalarVT.changeTypeToInteger();
11999 SDValue Popcount = DAG.getNode(
12000 ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
12001 Popcount =
12003 MaskVT.changeVectorElementType(PopcountVT), Popcount);
12004 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
12005 SDValue LastElmtPtr =
12006 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
12007 LastWriteVal = DAG.getLoad(
12008 ScalarVT, DL, Chain, LastElmtPtr,
12010 Chain = LastWriteVal.getValue(1);
12011 }
12012
12013 unsigned NumElms = VecVT.getVectorNumElements();
12014 for (unsigned I = 0; I < NumElms; I++) {
12015 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
12016 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12017 Chain = DAG.getStore(
12018 Chain, DL, ValI, OutPtr,
12020
12021 // Get the mask value and add it to the current output position. This
12022 // either increments by 1 if MaskI is true or adds 0 otherwise.
12023 // Freeze in case we have poison/undef mask entries.
12024 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
12025 MaskI = DAG.getFreeze(MaskI);
12026 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
12027 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
12028 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
12029
12030 if (HasPassthru && I == NumElms - 1) {
12031 SDValue EndOfVector =
12032 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
12033 SDValue AllLanesSelected =
12034 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
12035 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
12036 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12037
12038 // Re-write the last ValI if all lanes were selected. Otherwise,
12039 // overwrite the last write it with the passthru value.
12040 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
12041 LastWriteVal, SDNodeFlags::Unpredictable);
12042 Chain = DAG.getStore(
12043 Chain, DL, LastWriteVal, OutPtr,
12045 }
12046 }
12047
12048 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12049}
12050
12052 SelectionDAG &DAG) const {
12053 SDLoc DL(N);
12054 SDValue Acc = N->getOperand(0);
12055 SDValue MulLHS = N->getOperand(1);
12056 SDValue MulRHS = N->getOperand(2);
12057 EVT AccVT = Acc.getValueType();
12058 EVT MulOpVT = MulLHS.getValueType();
12059
12060 EVT ExtMulOpVT =
12062 MulOpVT.getVectorElementCount());
12063
12064 unsigned ExtOpcLHS = N->getOpcode() == ISD::PARTIAL_REDUCE_UMLA
12067 unsigned ExtOpcRHS = N->getOpcode() == ISD::PARTIAL_REDUCE_SMLA
12070
12071 if (ExtMulOpVT != MulOpVT) {
12072 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
12073 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
12074 }
12075 SDValue Input = MulLHS;
12076 APInt ConstantOne;
12077 if (!ISD::isConstantSplatVector(MulRHS.getNode(), ConstantOne) ||
12078 !ConstantOne.isOne())
12079 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12080
12081 unsigned Stride = AccVT.getVectorMinNumElements();
12082 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12083
12084 // Collect all of the subvectors
12085 std::deque<SDValue> Subvectors = {Acc};
12086 for (unsigned I = 0; I < ScaleFactor; I++)
12087 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
12088
12089 // Flatten the subvector tree
12090 while (Subvectors.size() > 1) {
12091 Subvectors.push_back(
12092 DAG.getNode(ISD::ADD, DL, AccVT, {Subvectors[0], Subvectors[1]}));
12093 Subvectors.pop_front();
12094 Subvectors.pop_front();
12095 }
12096
12097 assert(Subvectors.size() == 1 &&
12098 "There should only be one subvector after tree flattening");
12099
12100 return Subvectors[0];
12101}
12102
12104 SDValue &LHS, SDValue &RHS,
12105 SDValue &CC, SDValue Mask,
12106 SDValue EVL, bool &NeedInvert,
12107 const SDLoc &dl, SDValue &Chain,
12108 bool IsSignaling) const {
12109 MVT OpVT = LHS.getSimpleValueType();
12110 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
12111 NeedInvert = false;
12112 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12113 bool IsNonVP = !EVL;
12114 switch (getCondCodeAction(CCCode, OpVT)) {
12115 default:
12116 llvm_unreachable("Unknown condition code action!");
12118 // Nothing to do.
12119 break;
12122 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12123 std::swap(LHS, RHS);
12124 CC = DAG.getCondCode(InvCC);
12125 return true;
12126 }
12127 // Swapping operands didn't work. Try inverting the condition.
12128 bool NeedSwap = false;
12129 InvCC = getSetCCInverse(CCCode, OpVT);
12130 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
12131 // If inverting the condition is not enough, try swapping operands
12132 // on top of it.
12133 InvCC = ISD::getSetCCSwappedOperands(InvCC);
12134 NeedSwap = true;
12135 }
12136 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12137 CC = DAG.getCondCode(InvCC);
12138 NeedInvert = true;
12139 if (NeedSwap)
12140 std::swap(LHS, RHS);
12141 return true;
12142 }
12143
12144 // Special case: expand i1 comparisons using logical operations.
12145 if (OpVT == MVT::i1) {
12146 SDValue Ret;
12147 switch (CCCode) {
12148 default:
12149 llvm_unreachable("Unknown integer setcc!");
12150 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12151 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
12152 MVT::i1);
12153 break;
12154 case ISD::SETNE: // X != Y --> (X ^ Y)
12155 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
12156 break;
12157 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12158 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12159 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
12160 DAG.getNOT(dl, LHS, MVT::i1));
12161 break;
12162 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12163 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12164 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
12165 DAG.getNOT(dl, RHS, MVT::i1));
12166 break;
12167 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12168 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12169 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
12170 DAG.getNOT(dl, LHS, MVT::i1));
12171 break;
12172 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12173 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12174 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
12175 DAG.getNOT(dl, RHS, MVT::i1));
12176 break;
12177 }
12178
12179 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
12180 RHS = SDValue();
12181 CC = SDValue();
12182 return true;
12183 }
12184
12186 unsigned Opc = 0;
12187 switch (CCCode) {
12188 default:
12189 llvm_unreachable("Don't know how to expand this condition!");
12190 case ISD::SETUO:
12191 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
12192 CC1 = ISD::SETUNE;
12193 CC2 = ISD::SETUNE;
12194 Opc = ISD::OR;
12195 break;
12196 }
12198 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12199 NeedInvert = true;
12200 [[fallthrough]];
12201 case ISD::SETO:
12203 "If SETO is expanded, SETOEQ must be legal!");
12204 CC1 = ISD::SETOEQ;
12205 CC2 = ISD::SETOEQ;
12206 Opc = ISD::AND;
12207 break;
12208 case ISD::SETONE:
12209 case ISD::SETUEQ:
12210 // If the SETUO or SETO CC isn't legal, we might be able to use
12211 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12212 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12213 // the operands.
12214 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12215 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
12216 isCondCodeLegal(ISD::SETOLT, OpVT))) {
12217 CC1 = ISD::SETOGT;
12218 CC2 = ISD::SETOLT;
12219 Opc = ISD::OR;
12220 NeedInvert = ((unsigned)CCCode & 0x8U);
12221 break;
12222 }
12223 [[fallthrough]];
12224 case ISD::SETOEQ:
12225 case ISD::SETOGT:
12226 case ISD::SETOGE:
12227 case ISD::SETOLT:
12228 case ISD::SETOLE:
12229 case ISD::SETUNE:
12230 case ISD::SETUGT:
12231 case ISD::SETUGE:
12232 case ISD::SETULT:
12233 case ISD::SETULE:
12234 // If we are floating point, assign and break, otherwise fall through.
12235 if (!OpVT.isInteger()) {
12236 // We can use the 4th bit to tell if we are the unordered
12237 // or ordered version of the opcode.
12238 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12239 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12240 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12241 break;
12242 }
12243 // Fallthrough if we are unsigned integer.
12244 [[fallthrough]];
12245 case ISD::SETLE:
12246 case ISD::SETGT:
12247 case ISD::SETGE:
12248 case ISD::SETLT:
12249 case ISD::SETNE:
12250 case ISD::SETEQ:
12251 // If all combinations of inverting the condition and swapping operands
12252 // didn't work then we have no means to expand the condition.
12253 llvm_unreachable("Don't know how to expand this condition!");
12254 }
12255
12256 SDValue SetCC1, SetCC2;
12257 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12258 // If we aren't the ordered or unorder operation,
12259 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12260 if (IsNonVP) {
12261 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
12262 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
12263 } else {
12264 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
12265 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
12266 }
12267 } else {
12268 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12269 if (IsNonVP) {
12270 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
12271 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
12272 } else {
12273 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
12274 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
12275 }
12276 }
12277 if (Chain)
12278 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12279 SetCC2.getValue(1));
12280 if (IsNonVP)
12281 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12282 else {
12283 // Transform the binary opcode to the VP equivalent.
12284 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12285 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12286 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12287 }
12288 RHS = SDValue();
12289 CC = SDValue();
12290 return true;
12291 }
12292 }
12293 return false;
12294}
12295
12297 SelectionDAG &DAG) const {
12298 EVT VT = Node->getValueType(0);
12299 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12300 // split into two equal parts.
12301 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12302 return SDValue();
12303
12304 // Restrict expansion to cases where both parts can be concatenated.
12305 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12306 if (LoVT != HiVT || !isTypeLegal(LoVT))
12307 return SDValue();
12308
12309 SDLoc DL(Node);
12310 unsigned Opcode = Node->getOpcode();
12311
12312 // Don't expand if the result is likely to be unrolled anyway.
12313 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12314 return SDValue();
12315
12316 SmallVector<SDValue, 4> LoOps, HiOps;
12317 for (const SDValue &V : Node->op_values()) {
12318 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
12319 LoOps.push_back(Lo);
12320 HiOps.push_back(Hi);
12321 }
12322
12323 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
12324 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
12325 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
12326}
12327
12329 const SDLoc &DL,
12330 EVT InVecVT, SDValue EltNo,
12331 LoadSDNode *OriginalLoad,
12332 SelectionDAG &DAG) const {
12333 assert(OriginalLoad->isSimple());
12334
12335 EVT VecEltVT = InVecVT.getVectorElementType();
12336
12337 // If the vector element type is not a multiple of a byte then we are unable
12338 // to correctly compute an address to load only the extracted element as a
12339 // scalar.
12340 if (!VecEltVT.isByteSized())
12341 return SDValue();
12342
12343 ISD::LoadExtType ExtTy =
12344 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12345 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
12346 return SDValue();
12347
12348 std::optional<unsigned> ByteOffset;
12349 Align Alignment = OriginalLoad->getAlign();
12351 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
12352 int Elt = ConstEltNo->getZExtValue();
12353 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
12354 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
12355 Alignment = commonAlignment(Alignment, *ByteOffset);
12356 } else {
12357 // Discard the pointer info except the address space because the memory
12358 // operand can't represent this new access since the offset is variable.
12359 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
12360 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
12361 }
12362
12363 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
12364 return SDValue();
12365
12366 unsigned IsFast = 0;
12367 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
12368 OriginalLoad->getAddressSpace(), Alignment,
12369 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
12370 !IsFast)
12371 return SDValue();
12372
12373 SDValue NewPtr =
12374 getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
12375
12376 // We are replacing a vector load with a scalar load. The new load must have
12377 // identical memory op ordering to the original.
12378 SDValue Load;
12379 if (ResultVT.bitsGT(VecEltVT)) {
12380 // If the result type of vextract is wider than the load, then issue an
12381 // extending load instead.
12382 ISD::LoadExtType ExtType = isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT)
12384 : ISD::EXTLOAD;
12385 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
12386 NewPtr, MPI, VecEltVT, Alignment,
12387 OriginalLoad->getMemOperand()->getFlags(),
12388 OriginalLoad->getAAInfo());
12389 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12390 } else {
12391 // The result type is narrower or the same width as the vector element
12392 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
12393 Alignment, OriginalLoad->getMemOperand()->getFlags(),
12394 OriginalLoad->getAAInfo());
12395 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12396 if (ResultVT.bitsLT(VecEltVT))
12397 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
12398 else
12399 Load = DAG.getBitcast(ResultVT, Load);
12400 }
12401
12402 return Load;
12403}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool isSigned(unsigned int Opcode)
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
#define P(N)
Function const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1347
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1158
APInt bitcastToAPInt() const
Definition APFloat.h:1353
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1138
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1098
void changeSign()
Definition APFloat.h:1297
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1109
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1573
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1758
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1406
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:423
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1391
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1340
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:216
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1396
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:834
void negate()
Negate this APInt in place.
Definition APInt.h:1468
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
unsigned countLeadingZeros() const
Definition APInt.h:1606
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:356
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1435
unsigned logBase2() const
Definition APInt.h:1761
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:827
void setAllBits()
Set every bit to 1.
Definition APInt.h:1319
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1274
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:405
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:334
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1150
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1367
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1417
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1442
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1343
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:198
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:445
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
iterator end() const
Definition ArrayRef.h:348
iterator begin() const
Definition ArrayRef.h:347
Class to represent pointers.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:581
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:148
iterator end() const
Definition StringRef.h:122
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
TargetLoweringBase(const TargetMachine &TM)
NOTE: The TargetMachine owns TLOF.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:774
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:296
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:107
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:705
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3009
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:774
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:525
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:387
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:515
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:393
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:892
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:400
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:706
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:773
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:351
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:881
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:406
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:701
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:648
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:903
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
void stable_sort(R &&Range)
Definition STLExtras.h:2038
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:557
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:314
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1743
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
Definition ModRef.h:68
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1569
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:430
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:470
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:294
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:179
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:248
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:101
bool isZero() const
Returns true if value is all zero.
Definition KnownBits.h:80
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:235
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:154
KnownBits byteSwap() const
Definition KnownBits.h:507
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:282
KnownBits reverseBits() const
Definition KnownBits.h:511
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:226
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:165
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:314
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:69
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:304
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:173
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:241
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:60
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:98
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:160
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:279
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...