LLVM 22.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
46
47// Define the virtual destructor out-of-line for build efficiency.
49
50const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
51 return nullptr;
52}
53
57
58/// Check whether a given call node is in tail position within its function. If
59/// so, it sets Chain to the input chain of the tail call.
61 SDValue &Chain) const {
63
64 // First, check if tail calls have been disabled in this function.
65 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
66 return false;
67
68 // Conservatively require the attributes of the call to match those of
69 // the return. Ignore following attributes because they don't affect the
70 // call sequence.
71 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
72 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
73 Attribute::DereferenceableOrNull, Attribute::NoAlias,
74 Attribute::NonNull, Attribute::NoUndef,
75 Attribute::Range, Attribute::NoFPClass})
76 CallerAttrs.removeAttribute(Attr);
77
78 if (CallerAttrs.hasAttributes())
79 return false;
80
81 // It's not safe to eliminate the sign / zero extension of the return value.
82 if (CallerAttrs.contains(Attribute::ZExt) ||
83 CallerAttrs.contains(Attribute::SExt))
84 return false;
85
86 // Check if the only use is a function return node.
87 return isUsedByReturnOnly(Node, Chain);
88}
89
91 const uint32_t *CallerPreservedMask,
92 const SmallVectorImpl<CCValAssign> &ArgLocs,
93 const SmallVectorImpl<SDValue> &OutVals) const {
94 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
95 const CCValAssign &ArgLoc = ArgLocs[I];
96 if (!ArgLoc.isRegLoc())
97 continue;
98 MCRegister Reg = ArgLoc.getLocReg();
99 // Only look at callee saved registers.
100 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
101 continue;
102 // Check that we pass the value used for the caller.
103 // (We look for a CopyFromReg reading a virtual register that is used
104 // for the function live-in value of register Reg)
105 SDValue Value = OutVals[I];
106 if (Value->getOpcode() == ISD::AssertZext)
107 Value = Value.getOperand(0);
108 if (Value->getOpcode() != ISD::CopyFromReg)
109 return false;
110 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
111 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
112 return false;
113 }
114 return true;
115}
116
117/// Set CallLoweringInfo attribute flags based on a call instruction
118/// and called function attributes.
120 unsigned ArgIdx) {
121 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
122 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
123 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
124 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
125 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
126 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
127 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
128 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
129 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
130 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
131 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
132 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
133 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
134 Alignment = Call->getParamStackAlign(ArgIdx);
135 IndirectType = nullptr;
137 "multiple ABI attributes?");
138 if (IsByVal) {
139 IndirectType = Call->getParamByValType(ArgIdx);
140 if (!Alignment)
141 Alignment = Call->getParamAlign(ArgIdx);
142 }
143 if (IsPreallocated)
144 IndirectType = Call->getParamPreallocatedType(ArgIdx);
145 if (IsInAlloca)
146 IndirectType = Call->getParamInAllocaType(ArgIdx);
147 if (IsSRet)
148 IndirectType = Call->getParamStructRetType(ArgIdx);
149}
150
151/// Generate a libcall taking the given operands as arguments and returning a
152/// result of type RetVT.
153std::pair<SDValue, SDValue>
154TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
156 MakeLibCallOptions CallOptions,
157 const SDLoc &dl,
158 SDValue InChain) const {
159 if (!InChain)
160 InChain = DAG.getEntryNode();
161
163 Args.reserve(Ops.size());
164
165 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
166 for (unsigned i = 0; i < Ops.size(); ++i) {
167 SDValue NewOp = Ops[i];
168 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
169 ? OpsTypeOverrides[i]
170 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
171 TargetLowering::ArgListEntry Entry(NewOp, Ty);
172 if (CallOptions.IsSoften)
173 Entry.OrigTy =
174 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
175
176 Entry.IsSExt =
177 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
178 Entry.IsZExt = !Entry.IsSExt;
179
180 if (CallOptions.IsSoften &&
182 Entry.IsSExt = Entry.IsZExt = false;
183 }
184 Args.push_back(Entry);
185 }
186
187 const char *LibcallName = getLibcallName(LC);
188 if (LC == RTLIB::UNKNOWN_LIBCALL || !LibcallName)
189 reportFatalInternalError("unsupported library call operation");
190
191 SDValue Callee =
192 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
193
194 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
195 Type *OrigRetTy = RetTy;
197 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
198 bool zeroExtend = !signExtend;
199
200 if (CallOptions.IsSoften) {
201 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
203 signExtend = zeroExtend = false;
204 }
205
206 CLI.setDebugLoc(dl)
207 .setChain(InChain)
208 .setLibCallee(getLibcallCallingConv(LC), RetTy, OrigRetTy, Callee,
209 std::move(Args))
210 .setNoReturn(CallOptions.DoesNotReturn)
213 .setSExtResult(signExtend)
214 .setZExtResult(zeroExtend);
215 return LowerCallTo(CLI);
216}
217
219 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
220 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
221 const AttributeList &FuncAttributes) const {
222 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
223 Op.getSrcAlign() < Op.getDstAlign())
224 return false;
225
226 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
227
228 if (VT == MVT::Other) {
229 // Use the largest integer type whose alignment constraints are satisfied.
230 // We only need to check DstAlign here as SrcAlign is always greater or
231 // equal to DstAlign (or zero).
232 VT = MVT::LAST_INTEGER_VALUETYPE;
233 if (Op.isFixedDstAlign())
234 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
235 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
237 assert(VT.isInteger());
238
239 // Find the largest legal integer type.
240 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
241 while (!isTypeLegal(LVT))
242 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
243 assert(LVT.isInteger());
244
245 // If the type we've chosen is larger than the largest legal integer type
246 // then use that instead.
247 if (VT.bitsGT(LVT))
248 VT = LVT;
249 }
250
251 unsigned NumMemOps = 0;
252 uint64_t Size = Op.size();
253 while (Size) {
254 unsigned VTSize = VT.getSizeInBits() / 8;
255 while (VTSize > Size) {
256 // For now, only use non-vector load / store's for the left-over pieces.
257 EVT NewVT = VT;
258 unsigned NewVTSize;
259
260 bool Found = false;
261 if (VT.isVector() || VT.isFloatingPoint()) {
262 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
263 if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
265 Found = true;
266 else if (NewVT == MVT::i64 &&
267 isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
268 isSafeMemOpType(MVT::f64)) {
269 // i64 is usually not legal on 32-bit targets, but f64 may be.
270 NewVT = MVT::f64;
271 Found = true;
272 }
273 }
274
275 if (!Found) {
276 do {
277 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
278 if (NewVT == MVT::i8)
279 break;
280 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
281 }
282 NewVTSize = NewVT.getSizeInBits() / 8;
283
284 // If the new VT cannot cover all of the remaining bits, then consider
285 // issuing a (or a pair of) unaligned and overlapping load / store.
286 unsigned Fast;
287 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
289 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
291 Fast)
292 VTSize = Size;
293 else {
294 VT = NewVT;
295 VTSize = NewVTSize;
296 }
297 }
298
299 if (++NumMemOps > Limit)
300 return false;
301
302 MemOps.push_back(VT);
303 Size -= VTSize;
304 }
305
306 return true;
307}
308
309/// Soften the operands of a comparison. This code is shared among BR_CC,
310/// SELECT_CC, and SETCC handlers.
312 SDValue &NewLHS, SDValue &NewRHS,
313 ISD::CondCode &CCCode,
314 const SDLoc &dl, const SDValue OldLHS,
315 const SDValue OldRHS) const {
316 SDValue Chain;
317 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
318 OldRHS, Chain);
319}
320
322 SDValue &NewLHS, SDValue &NewRHS,
323 ISD::CondCode &CCCode,
324 const SDLoc &dl, const SDValue OldLHS,
325 const SDValue OldRHS,
326 SDValue &Chain,
327 bool IsSignaling) const {
328 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
329 // not supporting it. We can update this code when libgcc provides such
330 // functions.
331
332 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
333 && "Unsupported setcc type!");
334
335 // Expand into one or more soft-fp libcall(s).
336 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
337 bool ShouldInvertCC = false;
338 switch (CCCode) {
339 case ISD::SETEQ:
340 case ISD::SETOEQ:
341 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
342 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
343 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
344 break;
345 case ISD::SETNE:
346 case ISD::SETUNE:
347 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
348 (VT == MVT::f64) ? RTLIB::UNE_F64 :
349 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
350 break;
351 case ISD::SETGE:
352 case ISD::SETOGE:
353 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
354 (VT == MVT::f64) ? RTLIB::OGE_F64 :
355 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
356 break;
357 case ISD::SETLT:
358 case ISD::SETOLT:
359 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
360 (VT == MVT::f64) ? RTLIB::OLT_F64 :
361 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
362 break;
363 case ISD::SETLE:
364 case ISD::SETOLE:
365 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
366 (VT == MVT::f64) ? RTLIB::OLE_F64 :
367 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
368 break;
369 case ISD::SETGT:
370 case ISD::SETOGT:
371 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
372 (VT == MVT::f64) ? RTLIB::OGT_F64 :
373 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
374 break;
375 case ISD::SETO:
376 ShouldInvertCC = true;
377 [[fallthrough]];
378 case ISD::SETUO:
379 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
380 (VT == MVT::f64) ? RTLIB::UO_F64 :
381 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
382 break;
383 case ISD::SETONE:
384 // SETONE = O && UNE
385 ShouldInvertCC = true;
386 [[fallthrough]];
387 case ISD::SETUEQ:
388 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
389 (VT == MVT::f64) ? RTLIB::UO_F64 :
390 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
391 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
392 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
393 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
394 break;
395 default:
396 // Invert CC for unordered comparisons
397 ShouldInvertCC = true;
398 switch (CCCode) {
399 case ISD::SETULT:
400 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
401 (VT == MVT::f64) ? RTLIB::OGE_F64 :
402 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
403 break;
404 case ISD::SETULE:
405 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
406 (VT == MVT::f64) ? RTLIB::OGT_F64 :
407 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
408 break;
409 case ISD::SETUGT:
410 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
411 (VT == MVT::f64) ? RTLIB::OLE_F64 :
412 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
413 break;
414 case ISD::SETUGE:
415 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
416 (VT == MVT::f64) ? RTLIB::OLT_F64 :
417 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
418 break;
419 default: llvm_unreachable("Do not know how to soften this setcc!");
420 }
421 }
422
423 // Use the target specific return value for comparison lib calls.
425 SDValue Ops[2] = {NewLHS, NewRHS};
427 EVT OpsVT[2] = { OldLHS.getValueType(),
428 OldRHS.getValueType() };
429 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
430 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
431 NewLHS = Call.first;
432 NewRHS = DAG.getConstant(0, dl, RetVT);
433
434 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
435 if (LC1Impl == RTLIB::Unsupported) {
437 "no libcall available to soften floating-point compare");
438 }
439
440 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
441 if (ShouldInvertCC) {
442 assert(RetVT.isInteger());
443 CCCode = getSetCCInverse(CCCode, RetVT);
444 }
445
446 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
447 // Update Chain.
448 Chain = Call.second;
449 } else {
450 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
451 if (LC2Impl == RTLIB::Unsupported) {
453 "no libcall available to soften floating-point compare");
454 }
455
456 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
457 "unordered call should be simple boolean");
458
459 EVT SetCCVT =
460 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
462 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
463 DAG.getValueType(MVT::i1));
464 }
465
466 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
467 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
468 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
469 if (ShouldInvertCC)
470 CCCode = getSetCCInverse(CCCode, RetVT);
471 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
472 if (Chain)
473 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
474 Call2.second);
475 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
476 Tmp.getValueType(), Tmp, NewLHS);
477 NewRHS = SDValue();
478 }
479}
480
481/// Return the entry encoding for a jump table in the current function. The
482/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
484 // In non-pic modes, just use the address of a block.
487
488 // Otherwise, use a label difference.
490}
491
493 SelectionDAG &DAG) const {
494 return Table;
495}
496
497/// This returns the relocation base for the given PIC jumptable, the same as
498/// getPICJumpTableRelocBase, but as an MCExpr.
499const MCExpr *
501 unsigned JTI,MCContext &Ctx) const{
502 // The normal PIC reloc base is the label at the start of the jump table.
503 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
504}
505
507 SDValue Addr, int JTI,
508 SelectionDAG &DAG) const {
509 SDValue Chain = Value;
510 // Jump table debug info is only needed if CodeView is enabled.
512 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
513 }
514 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
515}
516
517bool
519 const TargetMachine &TM = getTargetMachine();
520 const GlobalValue *GV = GA->getGlobal();
521
522 // If the address is not even local to this DSO we will have to load it from
523 // a got and then add the offset.
524 if (!TM.shouldAssumeDSOLocal(GV))
525 return false;
526
527 // If the code is position independent we will have to add a base register.
529 return false;
530
531 // Otherwise we can do it.
532 return true;
533}
534
535//===----------------------------------------------------------------------===//
536// Optimization Methods
537//===----------------------------------------------------------------------===//
538
539/// If the specified instruction has a constant integer operand and there are
540/// bits set in that constant that are not demanded, then clear those bits and
541/// return true.
543 const APInt &DemandedBits,
544 const APInt &DemandedElts,
545 TargetLoweringOpt &TLO) const {
546 SDLoc DL(Op);
547 unsigned Opcode = Op.getOpcode();
548
549 // Early-out if we've ended up calling an undemanded node, leave this to
550 // constant folding.
551 if (DemandedBits.isZero() || DemandedElts.isZero())
552 return false;
553
554 // Do target-specific constant optimization.
555 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
556 return TLO.New.getNode();
557
558 // FIXME: ISD::SELECT, ISD::SELECT_CC
559 switch (Opcode) {
560 default:
561 break;
562 case ISD::XOR:
563 case ISD::AND:
564 case ISD::OR: {
565 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
566 if (!Op1C || Op1C->isOpaque())
567 return false;
568
569 // If this is a 'not' op, don't touch it because that's a canonical form.
570 const APInt &C = Op1C->getAPIntValue();
571 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
572 return false;
573
574 if (!C.isSubsetOf(DemandedBits)) {
575 EVT VT = Op.getValueType();
576 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
577 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
578 Op->getFlags());
579 return TLO.CombineTo(Op, NewOp);
580 }
581
582 break;
583 }
584 }
585
586 return false;
587}
588
590 const APInt &DemandedBits,
591 TargetLoweringOpt &TLO) const {
592 EVT VT = Op.getValueType();
593 APInt DemandedElts = VT.isVector()
595 : APInt(1, 1);
596 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
597}
598
599/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
600/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
601/// but it could be generalized for targets with other types of implicit
602/// widening casts.
604 const APInt &DemandedBits,
605 TargetLoweringOpt &TLO) const {
606 assert(Op.getNumOperands() == 2 &&
607 "ShrinkDemandedOp only supports binary operators!");
608 assert(Op.getNode()->getNumValues() == 1 &&
609 "ShrinkDemandedOp only supports nodes with one result!");
610
611 EVT VT = Op.getValueType();
612 SelectionDAG &DAG = TLO.DAG;
613 SDLoc dl(Op);
614
615 // Early return, as this function cannot handle vector types.
616 if (VT.isVector())
617 return false;
618
619 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
620 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
621 "ShrinkDemandedOp only supports operands that have the same size!");
622
623 // Don't do this if the node has another user, which may require the
624 // full value.
625 if (!Op.getNode()->hasOneUse())
626 return false;
627
628 // Search for the smallest integer type with free casts to and from
629 // Op's type. For expedience, just check power-of-2 integer types.
630 unsigned DemandedSize = DemandedBits.getActiveBits();
631 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
632 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
633 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
634 if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
635 // We found a type with free casts.
636
637 // If the operation has the 'disjoint' flag, then the
638 // operands on the new node are also disjoint.
639 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
641 SDValue X = DAG.getNode(
642 Op.getOpcode(), dl, SmallVT,
643 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
644 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
645 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
646 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
647 return TLO.CombineTo(Op, Z);
648 }
649 }
650 return false;
651}
652
654 DAGCombinerInfo &DCI) const {
655 SelectionDAG &DAG = DCI.DAG;
656 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
657 !DCI.isBeforeLegalizeOps());
658 KnownBits Known;
659
660 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
661 if (Simplified) {
662 DCI.AddToWorklist(Op.getNode());
664 }
665 return Simplified;
666}
667
669 const APInt &DemandedElts,
670 DAGCombinerInfo &DCI) const {
671 SelectionDAG &DAG = DCI.DAG;
672 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
673 !DCI.isBeforeLegalizeOps());
674 KnownBits Known;
675
676 bool Simplified =
677 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
678 if (Simplified) {
679 DCI.AddToWorklist(Op.getNode());
681 }
682 return Simplified;
683}
684
686 KnownBits &Known,
688 unsigned Depth,
689 bool AssumeSingleUse) const {
690 EVT VT = Op.getValueType();
691
692 // Since the number of lanes in a scalable vector is unknown at compile time,
693 // we track one bit which is implicitly broadcast to all lanes. This means
694 // that all lanes in a scalable vector are considered demanded.
695 APInt DemandedElts = VT.isFixedLengthVector()
697 : APInt(1, 1);
698 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
699 AssumeSingleUse);
700}
701
702// TODO: Under what circumstances can we create nodes? Constant folding?
704 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
705 SelectionDAG &DAG, unsigned Depth) const {
706 EVT VT = Op.getValueType();
707
708 // Limit search depth.
710 return SDValue();
711
712 // Ignore UNDEFs.
713 if (Op.isUndef())
714 return SDValue();
715
716 // Not demanding any bits/elts from Op.
717 if (DemandedBits == 0 || DemandedElts == 0)
718 return DAG.getUNDEF(VT);
719
720 bool IsLE = DAG.getDataLayout().isLittleEndian();
721 unsigned NumElts = DemandedElts.getBitWidth();
722 unsigned BitWidth = DemandedBits.getBitWidth();
723 KnownBits LHSKnown, RHSKnown;
724 switch (Op.getOpcode()) {
725 case ISD::BITCAST: {
726 if (VT.isScalableVector())
727 return SDValue();
728
729 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
730 EVT SrcVT = Src.getValueType();
731 EVT DstVT = Op.getValueType();
732 if (SrcVT == DstVT)
733 return Src;
734
735 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
736 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
737 if (NumSrcEltBits == NumDstEltBits)
739 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
740 return DAG.getBitcast(DstVT, V);
741
742 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
743 unsigned Scale = NumDstEltBits / NumSrcEltBits;
744 unsigned NumSrcElts = SrcVT.getVectorNumElements();
745 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
746 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
747 for (unsigned i = 0; i != Scale; ++i) {
748 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
749 unsigned BitOffset = EltOffset * NumSrcEltBits;
750 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
751 if (!Sub.isZero()) {
752 DemandedSrcBits |= Sub;
753 for (unsigned j = 0; j != NumElts; ++j)
754 if (DemandedElts[j])
755 DemandedSrcElts.setBit((j * Scale) + i);
756 }
757 }
758
760 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
761 return DAG.getBitcast(DstVT, V);
762 }
763
764 // TODO - bigendian once we have test coverage.
765 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
766 unsigned Scale = NumSrcEltBits / NumDstEltBits;
767 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
768 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
769 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
770 for (unsigned i = 0; i != NumElts; ++i)
771 if (DemandedElts[i]) {
772 unsigned Offset = (i % Scale) * NumDstEltBits;
773 DemandedSrcBits.insertBits(DemandedBits, Offset);
774 DemandedSrcElts.setBit(i / Scale);
775 }
776
778 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
779 return DAG.getBitcast(DstVT, V);
780 }
781
782 break;
783 }
784 case ISD::AND: {
785 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
786 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
787
788 // If all of the demanded bits are known 1 on one side, return the other.
789 // These bits cannot contribute to the result of the 'and' in this
790 // context.
791 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
792 return Op.getOperand(0);
793 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
794 return Op.getOperand(1);
795 break;
796 }
797 case ISD::OR: {
798 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
799 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
800
801 // If all of the demanded bits are known zero on one side, return the
802 // other. These bits cannot contribute to the result of the 'or' in this
803 // context.
804 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
805 return Op.getOperand(0);
806 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
807 return Op.getOperand(1);
808 break;
809 }
810 case ISD::XOR: {
811 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
812 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
813
814 // If all of the demanded bits are known zero on one side, return the
815 // other.
816 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
817 return Op.getOperand(0);
818 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
819 return Op.getOperand(1);
820 break;
821 }
822 case ISD::ADD: {
823 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
824 if (RHSKnown.isZero())
825 return Op.getOperand(0);
826
827 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
828 if (LHSKnown.isZero())
829 return Op.getOperand(1);
830 break;
831 }
832 case ISD::SHL: {
833 // If we are only demanding sign bits then we can use the shift source
834 // directly.
835 if (std::optional<unsigned> MaxSA =
836 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
837 SDValue Op0 = Op.getOperand(0);
838 unsigned ShAmt = *MaxSA;
839 unsigned NumSignBits =
840 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
841 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
842 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
843 return Op0;
844 }
845 break;
846 }
847 case ISD::SRL: {
848 // If we are only demanding sign bits then we can use the shift source
849 // directly.
850 if (std::optional<unsigned> MaxSA =
851 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
852 SDValue Op0 = Op.getOperand(0);
853 unsigned ShAmt = *MaxSA;
854 // Must already be signbits in DemandedBits bounds, and can't demand any
855 // shifted in zeroes.
856 if (DemandedBits.countl_zero() >= ShAmt) {
857 unsigned NumSignBits =
858 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
859 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
860 return Op0;
861 }
862 }
863 break;
864 }
865 case ISD::SETCC: {
866 SDValue Op0 = Op.getOperand(0);
867 SDValue Op1 = Op.getOperand(1);
868 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
869 // If (1) we only need the sign-bit, (2) the setcc operands are the same
870 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
871 // -1, we may be able to bypass the setcc.
872 if (DemandedBits.isSignMask() &&
876 // If we're testing X < 0, then this compare isn't needed - just use X!
877 // FIXME: We're limiting to integer types here, but this should also work
878 // if we don't care about FP signed-zero. The use of SETLT with FP means
879 // that we don't care about NaNs.
880 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
882 return Op0;
883 }
884 break;
885 }
887 // If none of the extended bits are demanded, eliminate the sextinreg.
888 SDValue Op0 = Op.getOperand(0);
889 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
890 unsigned ExBits = ExVT.getScalarSizeInBits();
891 if (DemandedBits.getActiveBits() <= ExBits &&
893 return Op0;
894 // If the input is already sign extended, just drop the extension.
895 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
896 if (NumSignBits >= (BitWidth - ExBits + 1))
897 return Op0;
898 break;
899 }
903 if (VT.isScalableVector())
904 return SDValue();
905
906 // If we only want the lowest element and none of extended bits, then we can
907 // return the bitcasted source vector.
908 SDValue Src = Op.getOperand(0);
909 EVT SrcVT = Src.getValueType();
910 EVT DstVT = Op.getValueType();
911 if (IsLE && DemandedElts == 1 &&
912 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
913 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
914 return DAG.getBitcast(DstVT, Src);
915 }
916 break;
917 }
919 if (VT.isScalableVector())
920 return SDValue();
921
922 // If we don't demand the inserted element, return the base vector.
923 SDValue Vec = Op.getOperand(0);
924 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
925 EVT VecVT = Vec.getValueType();
926 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
927 !DemandedElts[CIdx->getZExtValue()])
928 return Vec;
929 break;
930 }
932 if (VT.isScalableVector())
933 return SDValue();
934
935 SDValue Vec = Op.getOperand(0);
936 SDValue Sub = Op.getOperand(1);
937 uint64_t Idx = Op.getConstantOperandVal(2);
938 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
939 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
940 // If we don't demand the inserted subvector, return the base vector.
941 if (DemandedSubElts == 0)
942 return Vec;
943 break;
944 }
945 case ISD::VECTOR_SHUFFLE: {
947 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
948
949 // If all the demanded elts are from one operand and are inline,
950 // then we can use the operand directly.
951 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
952 for (unsigned i = 0; i != NumElts; ++i) {
953 int M = ShuffleMask[i];
954 if (M < 0 || !DemandedElts[i])
955 continue;
956 AllUndef = false;
957 IdentityLHS &= (M == (int)i);
958 IdentityRHS &= ((M - NumElts) == i);
959 }
960
961 if (AllUndef)
962 return DAG.getUNDEF(Op.getValueType());
963 if (IdentityLHS)
964 return Op.getOperand(0);
965 if (IdentityRHS)
966 return Op.getOperand(1);
967 break;
968 }
969 default:
970 // TODO: Probably okay to remove after audit; here to reduce change size
971 // in initial enablement patch for scalable vectors
972 if (VT.isScalableVector())
973 return SDValue();
974
975 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
977 Op, DemandedBits, DemandedElts, DAG, Depth))
978 return V;
979 break;
980 }
981 return SDValue();
982}
983
986 unsigned Depth) const {
987 EVT VT = Op.getValueType();
988 // Since the number of lanes in a scalable vector is unknown at compile time,
989 // we track one bit which is implicitly broadcast to all lanes. This means
990 // that all lanes in a scalable vector are considered demanded.
991 APInt DemandedElts = VT.isFixedLengthVector()
993 : APInt(1, 1);
994 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
995 Depth);
996}
997
999 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1000 unsigned Depth) const {
1001 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1002 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1003 Depth);
1004}
1005
1006// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1007// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1010 const TargetLowering &TLI,
1011 const APInt &DemandedBits,
1012 const APInt &DemandedElts, unsigned Depth) {
1013 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1014 "SRL or SRA node is required here!");
1015 // Is the right shift using an immediate value of 1?
1016 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1017 if (!N1C || !N1C->isOne())
1018 return SDValue();
1019
1020 // We are looking for an avgfloor
1021 // add(ext, ext)
1022 // or one of these as a avgceil
1023 // add(add(ext, ext), 1)
1024 // add(add(ext, 1), ext)
1025 // add(ext, add(ext, 1))
1026 SDValue Add = Op.getOperand(0);
1027 if (Add.getOpcode() != ISD::ADD)
1028 return SDValue();
1029
1030 SDValue ExtOpA = Add.getOperand(0);
1031 SDValue ExtOpB = Add.getOperand(1);
1032 SDValue Add2;
1033 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1034 ConstantSDNode *ConstOp;
1035 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1036 ConstOp->isOne()) {
1037 ExtOpA = Op1;
1038 ExtOpB = Op3;
1039 Add2 = A;
1040 return true;
1041 }
1042 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1043 ConstOp->isOne()) {
1044 ExtOpA = Op1;
1045 ExtOpB = Op2;
1046 Add2 = A;
1047 return true;
1048 }
1049 return false;
1050 };
1051 bool IsCeil =
1052 (ExtOpA.getOpcode() == ISD::ADD &&
1053 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1054 (ExtOpB.getOpcode() == ISD::ADD &&
1055 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1056
1057 // If the shift is signed (sra):
1058 // - Needs >= 2 sign bit for both operands.
1059 // - Needs >= 2 zero bits.
1060 // If the shift is unsigned (srl):
1061 // - Needs >= 1 zero bit for both operands.
1062 // - Needs 1 demanded bit zero and >= 2 sign bits.
1063 SelectionDAG &DAG = TLO.DAG;
1064 unsigned ShiftOpc = Op.getOpcode();
1065 bool IsSigned = false;
1066 unsigned KnownBits;
1067 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1068 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1069 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1070 unsigned NumZeroA =
1071 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1072 unsigned NumZeroB =
1073 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1074 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1075
1076 switch (ShiftOpc) {
1077 default:
1078 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1079 case ISD::SRA: {
1080 if (NumZero >= 2 && NumSigned < NumZero) {
1081 IsSigned = false;
1082 KnownBits = NumZero;
1083 break;
1084 }
1085 if (NumSigned >= 1) {
1086 IsSigned = true;
1087 KnownBits = NumSigned;
1088 break;
1089 }
1090 return SDValue();
1091 }
1092 case ISD::SRL: {
1093 if (NumZero >= 1 && NumSigned < NumZero) {
1094 IsSigned = false;
1095 KnownBits = NumZero;
1096 break;
1097 }
1098 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1099 IsSigned = true;
1100 KnownBits = NumSigned;
1101 break;
1102 }
1103 return SDValue();
1104 }
1105 }
1106
1107 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1108 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1109
1110 // Find the smallest power-2 type that is legal for this vector size and
1111 // operation, given the original type size and the number of known sign/zero
1112 // bits.
1113 EVT VT = Op.getValueType();
1114 unsigned MinWidth =
1115 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1116 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1118 return SDValue();
1119 if (VT.isVector())
1120 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1121 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1122 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1123 // larger type size to do the transform.
1124 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1125 return SDValue();
1126 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1127 Add.getOperand(1)) &&
1128 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1129 Add2.getOperand(1))))
1130 NVT = VT;
1131 else
1132 return SDValue();
1133 }
1134
1135 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1136 // this is likely to stop other folds (reassociation, value tracking etc.)
1137 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1138 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1139 return SDValue();
1140
1141 SDLoc DL(Op);
1142 SDValue ResultAVG =
1143 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1144 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1145 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1146}
1147
1148/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1149/// result of Op are ever used downstream. If we can use this information to
1150/// simplify Op, create a new simplified DAG node and return true, returning the
1151/// original and new nodes in Old and New. Otherwise, analyze the expression and
1152/// return a mask of Known bits for the expression (used to simplify the
1153/// caller). The Known bits may only be accurate for those bits in the
1154/// OriginalDemandedBits and OriginalDemandedElts.
1156 SDValue Op, const APInt &OriginalDemandedBits,
1157 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1158 unsigned Depth, bool AssumeSingleUse) const {
1159 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1160 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1161 "Mask size mismatches value type size!");
1162
1163 // Don't know anything.
1164 Known = KnownBits(BitWidth);
1165
1166 EVT VT = Op.getValueType();
1167 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1168 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1169 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1170 "Unexpected vector size");
1171
1172 APInt DemandedBits = OriginalDemandedBits;
1173 APInt DemandedElts = OriginalDemandedElts;
1174 SDLoc dl(Op);
1175
1176 // Undef operand.
1177 if (Op.isUndef())
1178 return false;
1179
1180 // We can't simplify target constants.
1181 if (Op.getOpcode() == ISD::TargetConstant)
1182 return false;
1183
1184 if (Op.getOpcode() == ISD::Constant) {
1185 // We know all of the bits for a constant!
1186 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1187 return false;
1188 }
1189
1190 if (Op.getOpcode() == ISD::ConstantFP) {
1191 // We know all of the bits for a floating point constant!
1193 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1194 return false;
1195 }
1196
1197 // Other users may use these bits.
1198 bool HasMultiUse = false;
1199 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1201 // Limit search depth.
1202 return false;
1203 }
1204 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1206 DemandedElts = APInt::getAllOnes(NumElts);
1207 HasMultiUse = true;
1208 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1209 // Not demanding any bits/elts from Op.
1210 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1211 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1212 // Limit search depth.
1213 return false;
1214 }
1215
1216 KnownBits Known2;
1217 switch (Op.getOpcode()) {
1218 case ISD::SCALAR_TO_VECTOR: {
1219 if (VT.isScalableVector())
1220 return false;
1221 if (!DemandedElts[0])
1222 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1223
1224 KnownBits SrcKnown;
1225 SDValue Src = Op.getOperand(0);
1226 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1227 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1228 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1229 return true;
1230
1231 // Upper elements are undef, so only get the knownbits if we just demand
1232 // the bottom element.
1233 if (DemandedElts == 1)
1234 Known = SrcKnown.anyextOrTrunc(BitWidth);
1235 break;
1236 }
1237 case ISD::BUILD_VECTOR:
1238 // Collect the known bits that are shared by every demanded element.
1239 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1240 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1241 return false; // Don't fall through, will infinitely loop.
1242 case ISD::SPLAT_VECTOR: {
1243 SDValue Scl = Op.getOperand(0);
1244 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1245 KnownBits KnownScl;
1246 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1247 return true;
1248
1249 // Implicitly truncate the bits to match the official semantics of
1250 // SPLAT_VECTOR.
1251 Known = KnownScl.trunc(BitWidth);
1252 break;
1253 }
1254 case ISD::LOAD: {
1255 auto *LD = cast<LoadSDNode>(Op);
1256 if (getTargetConstantFromLoad(LD)) {
1257 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1258 return false; // Don't fall through, will infinitely loop.
1259 }
1260 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1261 // If this is a ZEXTLoad and we are looking at the loaded value.
1262 EVT MemVT = LD->getMemoryVT();
1263 unsigned MemBits = MemVT.getScalarSizeInBits();
1264 Known.Zero.setBitsFrom(MemBits);
1265 return false; // Don't fall through, will infinitely loop.
1266 }
1267 break;
1268 }
1270 if (VT.isScalableVector())
1271 return false;
1272 SDValue Vec = Op.getOperand(0);
1273 SDValue Scl = Op.getOperand(1);
1274 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1275 EVT VecVT = Vec.getValueType();
1276
1277 // If index isn't constant, assume we need all vector elements AND the
1278 // inserted element.
1279 APInt DemandedVecElts(DemandedElts);
1280 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1281 unsigned Idx = CIdx->getZExtValue();
1282 DemandedVecElts.clearBit(Idx);
1283
1284 // Inserted element is not required.
1285 if (!DemandedElts[Idx])
1286 return TLO.CombineTo(Op, Vec);
1287 }
1288
1289 KnownBits KnownScl;
1290 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1291 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1292 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1293 return true;
1294
1295 Known = KnownScl.anyextOrTrunc(BitWidth);
1296
1297 KnownBits KnownVec;
1298 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1299 Depth + 1))
1300 return true;
1301
1302 if (!!DemandedVecElts)
1303 Known = Known.intersectWith(KnownVec);
1304
1305 return false;
1306 }
1307 case ISD::INSERT_SUBVECTOR: {
1308 if (VT.isScalableVector())
1309 return false;
1310 // Demand any elements from the subvector and the remainder from the src its
1311 // inserted into.
1312 SDValue Src = Op.getOperand(0);
1313 SDValue Sub = Op.getOperand(1);
1314 uint64_t Idx = Op.getConstantOperandVal(2);
1315 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1316 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1317 APInt DemandedSrcElts = DemandedElts;
1318 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1319
1320 KnownBits KnownSub, KnownSrc;
1321 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1322 Depth + 1))
1323 return true;
1324 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1325 Depth + 1))
1326 return true;
1327
1328 Known.Zero.setAllBits();
1329 Known.One.setAllBits();
1330 if (!!DemandedSubElts)
1331 Known = Known.intersectWith(KnownSub);
1332 if (!!DemandedSrcElts)
1333 Known = Known.intersectWith(KnownSrc);
1334
1335 // Attempt to avoid multi-use src if we don't need anything from it.
1336 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1337 !DemandedSrcElts.isAllOnes()) {
1339 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1341 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1342 if (NewSub || NewSrc) {
1343 NewSub = NewSub ? NewSub : Sub;
1344 NewSrc = NewSrc ? NewSrc : Src;
1345 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1346 Op.getOperand(2));
1347 return TLO.CombineTo(Op, NewOp);
1348 }
1349 }
1350 break;
1351 }
1353 if (VT.isScalableVector())
1354 return false;
1355 // Offset the demanded elts by the subvector index.
1356 SDValue Src = Op.getOperand(0);
1357 if (Src.getValueType().isScalableVector())
1358 break;
1359 uint64_t Idx = Op.getConstantOperandVal(1);
1360 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1361 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1362
1363 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1364 Depth + 1))
1365 return true;
1366
1367 // Attempt to avoid multi-use src if we don't need anything from it.
1368 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1370 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1371 if (DemandedSrc) {
1372 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1373 Op.getOperand(1));
1374 return TLO.CombineTo(Op, NewOp);
1375 }
1376 }
1377 break;
1378 }
1379 case ISD::CONCAT_VECTORS: {
1380 if (VT.isScalableVector())
1381 return false;
1382 Known.Zero.setAllBits();
1383 Known.One.setAllBits();
1384 EVT SubVT = Op.getOperand(0).getValueType();
1385 unsigned NumSubVecs = Op.getNumOperands();
1386 unsigned NumSubElts = SubVT.getVectorNumElements();
1387 for (unsigned i = 0; i != NumSubVecs; ++i) {
1388 APInt DemandedSubElts =
1389 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1390 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1391 Known2, TLO, Depth + 1))
1392 return true;
1393 // Known bits are shared by every demanded subvector element.
1394 if (!!DemandedSubElts)
1395 Known = Known.intersectWith(Known2);
1396 }
1397 break;
1398 }
1399 case ISD::VECTOR_SHUFFLE: {
1400 assert(!VT.isScalableVector());
1401 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1402
1403 // Collect demanded elements from shuffle operands..
1404 APInt DemandedLHS, DemandedRHS;
1405 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1406 DemandedRHS))
1407 break;
1408
1409 if (!!DemandedLHS || !!DemandedRHS) {
1410 SDValue Op0 = Op.getOperand(0);
1411 SDValue Op1 = Op.getOperand(1);
1412
1413 Known.Zero.setAllBits();
1414 Known.One.setAllBits();
1415 if (!!DemandedLHS) {
1416 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1417 Depth + 1))
1418 return true;
1419 Known = Known.intersectWith(Known2);
1420 }
1421 if (!!DemandedRHS) {
1422 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1423 Depth + 1))
1424 return true;
1425 Known = Known.intersectWith(Known2);
1426 }
1427
1428 // Attempt to avoid multi-use ops if we don't need anything from them.
1430 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1432 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1433 if (DemandedOp0 || DemandedOp1) {
1434 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1435 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1436 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1437 return TLO.CombineTo(Op, NewOp);
1438 }
1439 }
1440 break;
1441 }
1442 case ISD::AND: {
1443 SDValue Op0 = Op.getOperand(0);
1444 SDValue Op1 = Op.getOperand(1);
1445
1446 // If the RHS is a constant, check to see if the LHS would be zero without
1447 // using the bits from the RHS. Below, we use knowledge about the RHS to
1448 // simplify the LHS, here we're using information from the LHS to simplify
1449 // the RHS.
1450 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1451 // Do not increment Depth here; that can cause an infinite loop.
1452 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1453 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1454 if ((LHSKnown.Zero & DemandedBits) ==
1455 (~RHSC->getAPIntValue() & DemandedBits))
1456 return TLO.CombineTo(Op, Op0);
1457
1458 // If any of the set bits in the RHS are known zero on the LHS, shrink
1459 // the constant.
1460 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1461 DemandedElts, TLO))
1462 return true;
1463
1464 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1465 // constant, but if this 'and' is only clearing bits that were just set by
1466 // the xor, then this 'and' can be eliminated by shrinking the mask of
1467 // the xor. For example, for a 32-bit X:
1468 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1469 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1470 LHSKnown.One == ~RHSC->getAPIntValue()) {
1471 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1472 return TLO.CombineTo(Op, Xor);
1473 }
1474 }
1475
1476 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1477 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1478 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1479 (Op0.getOperand(0).isUndef() ||
1481 Op0->hasOneUse()) {
1482 unsigned NumSubElts =
1484 unsigned SubIdx = Op0.getConstantOperandVal(2);
1485 APInt DemandedSub =
1486 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1487 KnownBits KnownSubMask =
1488 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1489 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1490 SDValue NewAnd =
1491 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1492 SDValue NewInsert =
1493 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1494 Op0.getOperand(1), Op0.getOperand(2));
1495 return TLO.CombineTo(Op, NewInsert);
1496 }
1497 }
1498
1499 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1500 Depth + 1))
1501 return true;
1502 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1503 Known2, TLO, Depth + 1))
1504 return true;
1505
1506 // If all of the demanded bits are known one on one side, return the other.
1507 // These bits cannot contribute to the result of the 'and'.
1508 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1509 return TLO.CombineTo(Op, Op0);
1510 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1511 return TLO.CombineTo(Op, Op1);
1512 // If all of the demanded bits in the inputs are known zeros, return zero.
1513 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1514 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1515 // If the RHS is a constant, see if we can simplify it.
1516 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1517 TLO))
1518 return true;
1519 // If the operation can be done in a smaller type, do so.
1521 return true;
1522
1523 // Attempt to avoid multi-use ops if we don't need anything from them.
1524 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1526 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1528 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1529 if (DemandedOp0 || DemandedOp1) {
1530 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1531 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1532 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1533 return TLO.CombineTo(Op, NewOp);
1534 }
1535 }
1536
1537 Known &= Known2;
1538 break;
1539 }
1540 case ISD::OR: {
1541 SDValue Op0 = Op.getOperand(0);
1542 SDValue Op1 = Op.getOperand(1);
1543 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1544 Depth + 1)) {
1545 Op->dropFlags(SDNodeFlags::Disjoint);
1546 return true;
1547 }
1548
1549 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1550 Known2, TLO, Depth + 1)) {
1551 Op->dropFlags(SDNodeFlags::Disjoint);
1552 return true;
1553 }
1554
1555 // If all of the demanded bits are known zero on one side, return the other.
1556 // These bits cannot contribute to the result of the 'or'.
1557 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1558 return TLO.CombineTo(Op, Op0);
1559 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1560 return TLO.CombineTo(Op, Op1);
1561 // If the RHS is a constant, see if we can simplify it.
1562 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1563 return true;
1564 // If the operation can be done in a smaller type, do so.
1566 return true;
1567
1568 // Attempt to avoid multi-use ops if we don't need anything from them.
1569 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1571 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1573 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1574 if (DemandedOp0 || DemandedOp1) {
1575 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1576 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1577 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1578 return TLO.CombineTo(Op, NewOp);
1579 }
1580 }
1581
1582 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1583 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1584 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1585 Op0->hasOneUse() && Op1->hasOneUse()) {
1586 // Attempt to match all commutations - m_c_Or would've been useful!
1587 for (int I = 0; I != 2; ++I) {
1588 SDValue X = Op.getOperand(I).getOperand(0);
1589 SDValue C1 = Op.getOperand(I).getOperand(1);
1590 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1591 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1592 if (Alt.getOpcode() == ISD::OR) {
1593 for (int J = 0; J != 2; ++J) {
1594 if (X == Alt.getOperand(J)) {
1595 SDValue Y = Alt.getOperand(1 - J);
1596 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1597 {C1, C2})) {
1598 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1599 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1600 return TLO.CombineTo(
1601 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1602 }
1603 }
1604 }
1605 }
1606 }
1607 }
1608
1609 Known |= Known2;
1610 break;
1611 }
1612 case ISD::XOR: {
1613 SDValue Op0 = Op.getOperand(0);
1614 SDValue Op1 = Op.getOperand(1);
1615
1616 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1617 Depth + 1))
1618 return true;
1619 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1620 Depth + 1))
1621 return true;
1622
1623 // If all of the demanded bits are known zero on one side, return the other.
1624 // These bits cannot contribute to the result of the 'xor'.
1625 if (DemandedBits.isSubsetOf(Known.Zero))
1626 return TLO.CombineTo(Op, Op0);
1627 if (DemandedBits.isSubsetOf(Known2.Zero))
1628 return TLO.CombineTo(Op, Op1);
1629 // If the operation can be done in a smaller type, do so.
1631 return true;
1632
1633 // If all of the unknown bits are known to be zero on one side or the other
1634 // turn this into an *inclusive* or.
1635 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1636 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1637 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1638
1639 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1640 if (C) {
1641 // If one side is a constant, and all of the set bits in the constant are
1642 // also known set on the other side, turn this into an AND, as we know
1643 // the bits will be cleared.
1644 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1645 // NB: it is okay if more bits are known than are requested
1646 if (C->getAPIntValue() == Known2.One) {
1647 SDValue ANDC =
1648 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1649 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1650 }
1651
1652 // If the RHS is a constant, see if we can change it. Don't alter a -1
1653 // constant because that's a 'not' op, and that is better for combining
1654 // and codegen.
1655 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1656 // We're flipping all demanded bits. Flip the undemanded bits too.
1657 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1658 return TLO.CombineTo(Op, New);
1659 }
1660
1661 unsigned Op0Opcode = Op0.getOpcode();
1662 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1663 if (ConstantSDNode *ShiftC =
1664 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1665 // Don't crash on an oversized shift. We can not guarantee that a
1666 // bogus shift has been simplified to undef.
1667 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1668 uint64_t ShiftAmt = ShiftC->getZExtValue();
1670 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1671 : Ones.lshr(ShiftAmt);
1672 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1674 // If the xor constant is a demanded mask, do a 'not' before the
1675 // shift:
1676 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1677 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1678 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1679 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1680 Op0.getOperand(1)));
1681 }
1682 }
1683 }
1684 }
1685 }
1686
1687 // If we can't turn this into a 'not', try to shrink the constant.
1688 if (!C || !C->isAllOnes())
1689 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1690 return true;
1691
1692 // Attempt to avoid multi-use ops if we don't need anything from them.
1693 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1695 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1697 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1698 if (DemandedOp0 || DemandedOp1) {
1699 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1700 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1701 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1702 return TLO.CombineTo(Op, NewOp);
1703 }
1704 }
1705
1706 Known ^= Known2;
1707 break;
1708 }
1709 case ISD::SELECT:
1710 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1711 Known, TLO, Depth + 1))
1712 return true;
1713 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1714 Known2, TLO, Depth + 1))
1715 return true;
1716
1717 // If the operands are constants, see if we can simplify them.
1718 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1719 return true;
1720
1721 // Only known if known in both the LHS and RHS.
1722 Known = Known.intersectWith(Known2);
1723 break;
1724 case ISD::VSELECT:
1725 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1726 Known, TLO, Depth + 1))
1727 return true;
1728 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1729 Known2, TLO, Depth + 1))
1730 return true;
1731
1732 // Only known if known in both the LHS and RHS.
1733 Known = Known.intersectWith(Known2);
1734 break;
1735 case ISD::SELECT_CC:
1736 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1737 Known, TLO, Depth + 1))
1738 return true;
1739 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1740 Known2, TLO, Depth + 1))
1741 return true;
1742
1743 // If the operands are constants, see if we can simplify them.
1744 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1745 return true;
1746
1747 // Only known if known in both the LHS and RHS.
1748 Known = Known.intersectWith(Known2);
1749 break;
1750 case ISD::SETCC: {
1751 SDValue Op0 = Op.getOperand(0);
1752 SDValue Op1 = Op.getOperand(1);
1753 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1754 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1755 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1756 // -1, we may be able to bypass the setcc.
1757 if (DemandedBits.isSignMask() &&
1761 // If we're testing X < 0, then this compare isn't needed - just use X!
1762 // FIXME: We're limiting to integer types here, but this should also work
1763 // if we don't care about FP signed-zero. The use of SETLT with FP means
1764 // that we don't care about NaNs.
1765 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1767 return TLO.CombineTo(Op, Op0);
1768
1769 // TODO: Should we check for other forms of sign-bit comparisons?
1770 // Examples: X <= -1, X >= 0
1771 }
1772 if (getBooleanContents(Op0.getValueType()) ==
1774 BitWidth > 1)
1775 Known.Zero.setBitsFrom(1);
1776 break;
1777 }
1778 case ISD::SHL: {
1779 SDValue Op0 = Op.getOperand(0);
1780 SDValue Op1 = Op.getOperand(1);
1781 EVT ShiftVT = Op1.getValueType();
1782
1783 if (std::optional<unsigned> KnownSA =
1784 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1785 unsigned ShAmt = *KnownSA;
1786 if (ShAmt == 0)
1787 return TLO.CombineTo(Op, Op0);
1788
1789 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1790 // single shift. We can do this if the bottom bits (which are shifted
1791 // out) are never demanded.
1792 // TODO - support non-uniform vector amounts.
1793 if (Op0.getOpcode() == ISD::SRL) {
1794 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1795 if (std::optional<unsigned> InnerSA =
1796 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1797 unsigned C1 = *InnerSA;
1798 unsigned Opc = ISD::SHL;
1799 int Diff = ShAmt - C1;
1800 if (Diff < 0) {
1801 Diff = -Diff;
1802 Opc = ISD::SRL;
1803 }
1804 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1805 return TLO.CombineTo(
1806 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1807 }
1808 }
1809 }
1810
1811 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1812 // are not demanded. This will likely allow the anyext to be folded away.
1813 // TODO - support non-uniform vector amounts.
1814 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1815 SDValue InnerOp = Op0.getOperand(0);
1816 EVT InnerVT = InnerOp.getValueType();
1817 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1818 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1819 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1820 SDValue NarrowShl = TLO.DAG.getNode(
1821 ISD::SHL, dl, InnerVT, InnerOp,
1822 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1823 return TLO.CombineTo(
1824 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1825 }
1826
1827 // Repeat the SHL optimization above in cases where an extension
1828 // intervenes: (shl (anyext (shr x, c1)), c2) to
1829 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1830 // aren't demanded (as above) and that the shifted upper c1 bits of
1831 // x aren't demanded.
1832 // TODO - support non-uniform vector amounts.
1833 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1834 InnerOp.hasOneUse()) {
1835 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1836 InnerOp, DemandedElts, Depth + 2)) {
1837 unsigned InnerShAmt = *SA2;
1838 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1839 DemandedBits.getActiveBits() <=
1840 (InnerBits - InnerShAmt + ShAmt) &&
1841 DemandedBits.countr_zero() >= ShAmt) {
1842 SDValue NewSA =
1843 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1844 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1845 InnerOp.getOperand(0));
1846 return TLO.CombineTo(
1847 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1848 }
1849 }
1850 }
1851 }
1852
1853 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1854 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1855 Depth + 1)) {
1856 // Disable the nsw and nuw flags. We can no longer guarantee that we
1857 // won't wrap after simplification.
1858 Op->dropFlags(SDNodeFlags::NoWrap);
1859 return true;
1860 }
1861 Known <<= ShAmt;
1862 // low bits known zero.
1863 Known.Zero.setLowBits(ShAmt);
1864
1865 // Attempt to avoid multi-use ops if we don't need anything from them.
1866 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1868 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1869 if (DemandedOp0) {
1870 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1871 return TLO.CombineTo(Op, NewOp);
1872 }
1873 }
1874
1875 // TODO: Can we merge this fold with the one below?
1876 // Try shrinking the operation as long as the shift amount will still be
1877 // in range.
1878 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1879 Op.getNode()->hasOneUse()) {
1880 // Search for the smallest integer type with free casts to and from
1881 // Op's type. For expedience, just check power-of-2 integer types.
1882 unsigned DemandedSize = DemandedBits.getActiveBits();
1883 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1884 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1885 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1886 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1887 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1888 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1889 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1890 assert(DemandedSize <= SmallVTBits &&
1891 "Narrowed below demanded bits?");
1892 // We found a type with free casts.
1893 SDValue NarrowShl = TLO.DAG.getNode(
1894 ISD::SHL, dl, SmallVT,
1895 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1896 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1897 return TLO.CombineTo(
1898 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1899 }
1900 }
1901 }
1902
1903 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1904 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1905 // Only do this if we demand the upper half so the knownbits are correct.
1906 unsigned HalfWidth = BitWidth / 2;
1907 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1908 DemandedBits.countLeadingOnes() >= HalfWidth) {
1909 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1910 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1911 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1912 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1913 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1914 // If we're demanding the upper bits at all, we must ensure
1915 // that the upper bits of the shift result are known to be zero,
1916 // which is equivalent to the narrow shift being NUW.
1917 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1918 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1919 SDNodeFlags Flags;
1920 Flags.setNoSignedWrap(IsNSW);
1921 Flags.setNoUnsignedWrap(IsNUW);
1922 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1923 SDValue NewShiftAmt =
1924 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1925 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1926 NewShiftAmt, Flags);
1927 SDValue NewExt =
1928 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1929 return TLO.CombineTo(Op, NewExt);
1930 }
1931 }
1932 }
1933 } else {
1934 // This is a variable shift, so we can't shift the demand mask by a known
1935 // amount. But if we are not demanding high bits, then we are not
1936 // demanding those bits from the pre-shifted operand either.
1937 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1938 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1939 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1940 Depth + 1)) {
1941 // Disable the nsw and nuw flags. We can no longer guarantee that we
1942 // won't wrap after simplification.
1943 Op->dropFlags(SDNodeFlags::NoWrap);
1944 return true;
1945 }
1946 Known.resetAll();
1947 }
1948 }
1949
1950 // If we are only demanding sign bits then we can use the shift source
1951 // directly.
1952 if (std::optional<unsigned> MaxSA =
1953 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1954 unsigned ShAmt = *MaxSA;
1955 unsigned NumSignBits =
1956 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1957 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1958 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1959 return TLO.CombineTo(Op, Op0);
1960 }
1961 break;
1962 }
1963 case ISD::SRL: {
1964 SDValue Op0 = Op.getOperand(0);
1965 SDValue Op1 = Op.getOperand(1);
1966 EVT ShiftVT = Op1.getValueType();
1967
1968 if (std::optional<unsigned> KnownSA =
1969 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1970 unsigned ShAmt = *KnownSA;
1971 if (ShAmt == 0)
1972 return TLO.CombineTo(Op, Op0);
1973
1974 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1975 // single shift. We can do this if the top bits (which are shifted out)
1976 // are never demanded.
1977 // TODO - support non-uniform vector amounts.
1978 if (Op0.getOpcode() == ISD::SHL) {
1979 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1980 if (std::optional<unsigned> InnerSA =
1981 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1982 unsigned C1 = *InnerSA;
1983 unsigned Opc = ISD::SRL;
1984 int Diff = ShAmt - C1;
1985 if (Diff < 0) {
1986 Diff = -Diff;
1987 Opc = ISD::SHL;
1988 }
1989 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1990 return TLO.CombineTo(
1991 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1992 }
1993 }
1994 }
1995
1996 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1997 // single sra. We can do this if the top bits are never demanded.
1998 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
1999 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2000 if (std::optional<unsigned> InnerSA =
2001 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2002 unsigned C1 = *InnerSA;
2003 // Clamp the combined shift amount if it exceeds the bit width.
2004 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2005 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2006 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2007 Op0.getOperand(0), NewSA));
2008 }
2009 }
2010 }
2011
2012 APInt InDemandedMask = (DemandedBits << ShAmt);
2013
2014 // If the shift is exact, then it does demand the low bits (and knows that
2015 // they are zero).
2016 if (Op->getFlags().hasExact())
2017 InDemandedMask.setLowBits(ShAmt);
2018
2019 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2020 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2021 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2023 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2024 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2025 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2026 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2027 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2028 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2029 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2030 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2031 SDValue NewShiftAmt =
2032 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2033 SDValue NewShift =
2034 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2035 return TLO.CombineTo(
2036 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2037 }
2038 }
2039
2040 // Compute the new bits that are at the top now.
2041 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2042 Depth + 1))
2043 return true;
2044 Known >>= ShAmt;
2045 // High bits known zero.
2046 Known.Zero.setHighBits(ShAmt);
2047
2048 // Attempt to avoid multi-use ops if we don't need anything from them.
2049 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2051 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2052 if (DemandedOp0) {
2053 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2054 return TLO.CombineTo(Op, NewOp);
2055 }
2056 }
2057 } else {
2058 // Use generic knownbits computation as it has support for non-uniform
2059 // shift amounts.
2060 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2061 }
2062
2063 // If we are only demanding sign bits then we can use the shift source
2064 // directly.
2065 if (std::optional<unsigned> MaxSA =
2066 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2067 unsigned ShAmt = *MaxSA;
2068 // Must already be signbits in DemandedBits bounds, and can't demand any
2069 // shifted in zeroes.
2070 if (DemandedBits.countl_zero() >= ShAmt) {
2071 unsigned NumSignBits =
2072 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2073 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2074 return TLO.CombineTo(Op, Op0);
2075 }
2076 }
2077
2078 // Try to match AVG patterns (after shift simplification).
2079 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2080 DemandedElts, Depth + 1))
2081 return TLO.CombineTo(Op, AVG);
2082
2083 break;
2084 }
2085 case ISD::SRA: {
2086 SDValue Op0 = Op.getOperand(0);
2087 SDValue Op1 = Op.getOperand(1);
2088 EVT ShiftVT = Op1.getValueType();
2089
2090 // If we only want bits that already match the signbit then we don't need
2091 // to shift.
2092 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2093 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2094 NumHiDemandedBits)
2095 return TLO.CombineTo(Op, Op0);
2096
2097 // If this is an arithmetic shift right and only the low-bit is set, we can
2098 // always convert this into a logical shr, even if the shift amount is
2099 // variable. The low bit of the shift cannot be an input sign bit unless
2100 // the shift amount is >= the size of the datatype, which is undefined.
2101 if (DemandedBits.isOne())
2102 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2103
2104 if (std::optional<unsigned> KnownSA =
2105 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2106 unsigned ShAmt = *KnownSA;
2107 if (ShAmt == 0)
2108 return TLO.CombineTo(Op, Op0);
2109
2110 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2111 // supports sext_inreg.
2112 if (Op0.getOpcode() == ISD::SHL) {
2113 if (std::optional<unsigned> InnerSA =
2114 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2115 unsigned LowBits = BitWidth - ShAmt;
2116 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2117 if (VT.isVector())
2118 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2120
2121 if (*InnerSA == ShAmt) {
2122 if (!TLO.LegalOperations() ||
2124 return TLO.CombineTo(
2125 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2126 Op0.getOperand(0),
2127 TLO.DAG.getValueType(ExtVT)));
2128
2129 // Even if we can't convert to sext_inreg, we might be able to
2130 // remove this shift pair if the input is already sign extended.
2131 unsigned NumSignBits =
2132 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2133 if (NumSignBits > ShAmt)
2134 return TLO.CombineTo(Op, Op0.getOperand(0));
2135 }
2136 }
2137 }
2138
2139 APInt InDemandedMask = (DemandedBits << ShAmt);
2140
2141 // If the shift is exact, then it does demand the low bits (and knows that
2142 // they are zero).
2143 if (Op->getFlags().hasExact())
2144 InDemandedMask.setLowBits(ShAmt);
2145
2146 // If any of the demanded bits are produced by the sign extension, we also
2147 // demand the input sign bit.
2148 if (DemandedBits.countl_zero() < ShAmt)
2149 InDemandedMask.setSignBit();
2150
2151 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2152 Depth + 1))
2153 return true;
2154 Known >>= ShAmt;
2155
2156 // If the input sign bit is known to be zero, or if none of the top bits
2157 // are demanded, turn this into an unsigned shift right.
2158 if (Known.Zero[BitWidth - ShAmt - 1] ||
2159 DemandedBits.countl_zero() >= ShAmt) {
2160 SDNodeFlags Flags;
2161 Flags.setExact(Op->getFlags().hasExact());
2162 return TLO.CombineTo(
2163 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2164 }
2165
2166 int Log2 = DemandedBits.exactLogBase2();
2167 if (Log2 >= 0) {
2168 // The bit must come from the sign.
2169 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2170 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2171 }
2172
2173 if (Known.One[BitWidth - ShAmt - 1])
2174 // New bits are known one.
2175 Known.One.setHighBits(ShAmt);
2176
2177 // Attempt to avoid multi-use ops if we don't need anything from them.
2178 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2180 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2181 if (DemandedOp0) {
2182 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2183 return TLO.CombineTo(Op, NewOp);
2184 }
2185 }
2186 }
2187
2188 // Try to match AVG patterns (after shift simplification).
2189 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2190 DemandedElts, Depth + 1))
2191 return TLO.CombineTo(Op, AVG);
2192
2193 break;
2194 }
2195 case ISD::FSHL:
2196 case ISD::FSHR: {
2197 SDValue Op0 = Op.getOperand(0);
2198 SDValue Op1 = Op.getOperand(1);
2199 SDValue Op2 = Op.getOperand(2);
2200 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2201
2202 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2203 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2204
2205 // For fshl, 0-shift returns the 1st arg.
2206 // For fshr, 0-shift returns the 2nd arg.
2207 if (Amt == 0) {
2208 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2209 Known, TLO, Depth + 1))
2210 return true;
2211 break;
2212 }
2213
2214 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2215 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2216 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2217 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2218 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2219 Depth + 1))
2220 return true;
2221 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2222 Depth + 1))
2223 return true;
2224
2225 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2226 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2227 Known = Known.unionWith(Known2);
2228
2229 // Attempt to avoid multi-use ops if we don't need anything from them.
2230 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2231 !DemandedElts.isAllOnes()) {
2233 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2235 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2236 if (DemandedOp0 || DemandedOp1) {
2237 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2238 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2239 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2240 DemandedOp1, Op2);
2241 return TLO.CombineTo(Op, NewOp);
2242 }
2243 }
2244 }
2245
2246 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2247 if (isPowerOf2_32(BitWidth)) {
2248 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2249 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2250 Known2, TLO, Depth + 1))
2251 return true;
2252 }
2253 break;
2254 }
2255 case ISD::ROTL:
2256 case ISD::ROTR: {
2257 SDValue Op0 = Op.getOperand(0);
2258 SDValue Op1 = Op.getOperand(1);
2259 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2260
2261 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2262 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2263 return TLO.CombineTo(Op, Op0);
2264
2265 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2266 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2267 unsigned RevAmt = BitWidth - Amt;
2268
2269 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2270 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2271 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2272 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2273 Depth + 1))
2274 return true;
2275
2276 // rot*(x, 0) --> x
2277 if (Amt == 0)
2278 return TLO.CombineTo(Op, Op0);
2279
2280 // See if we don't demand either half of the rotated bits.
2281 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2282 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2283 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2284 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2285 }
2286 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2287 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2288 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2289 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2290 }
2291 }
2292
2293 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2294 if (isPowerOf2_32(BitWidth)) {
2295 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2296 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2297 Depth + 1))
2298 return true;
2299 }
2300 break;
2301 }
2302 case ISD::SMIN:
2303 case ISD::SMAX:
2304 case ISD::UMIN:
2305 case ISD::UMAX: {
2306 unsigned Opc = Op.getOpcode();
2307 SDValue Op0 = Op.getOperand(0);
2308 SDValue Op1 = Op.getOperand(1);
2309
2310 // If we're only demanding signbits, then we can simplify to OR/AND node.
2311 unsigned BitOp =
2312 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2313 unsigned NumSignBits =
2314 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2315 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2316 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2317 if (NumSignBits >= NumDemandedUpperBits)
2318 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2319
2320 // Check if one arg is always less/greater than (or equal) to the other arg.
2321 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2322 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2323 switch (Opc) {
2324 case ISD::SMIN:
2325 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2326 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2327 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2328 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2329 Known = KnownBits::smin(Known0, Known1);
2330 break;
2331 case ISD::SMAX:
2332 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2333 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2334 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2335 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2336 Known = KnownBits::smax(Known0, Known1);
2337 break;
2338 case ISD::UMIN:
2339 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2340 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2341 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2342 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2343 Known = KnownBits::umin(Known0, Known1);
2344 break;
2345 case ISD::UMAX:
2346 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2347 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2348 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2349 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2350 Known = KnownBits::umax(Known0, Known1);
2351 break;
2352 }
2353 break;
2354 }
2355 case ISD::BITREVERSE: {
2356 SDValue Src = Op.getOperand(0);
2357 APInt DemandedSrcBits = DemandedBits.reverseBits();
2358 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2359 Depth + 1))
2360 return true;
2361 Known = Known2.reverseBits();
2362 break;
2363 }
2364 case ISD::BSWAP: {
2365 SDValue Src = Op.getOperand(0);
2366
2367 // If the only bits demanded come from one byte of the bswap result,
2368 // just shift the input byte into position to eliminate the bswap.
2369 unsigned NLZ = DemandedBits.countl_zero();
2370 unsigned NTZ = DemandedBits.countr_zero();
2371
2372 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2373 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2374 // have 14 leading zeros, round to 8.
2375 NLZ = alignDown(NLZ, 8);
2376 NTZ = alignDown(NTZ, 8);
2377 // If we need exactly one byte, we can do this transformation.
2378 if (BitWidth - NLZ - NTZ == 8) {
2379 // Replace this with either a left or right shift to get the byte into
2380 // the right place.
2381 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2382 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2383 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2384 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2385 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2386 return TLO.CombineTo(Op, NewOp);
2387 }
2388 }
2389
2390 APInt DemandedSrcBits = DemandedBits.byteSwap();
2391 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2392 Depth + 1))
2393 return true;
2394 Known = Known2.byteSwap();
2395 break;
2396 }
2397 case ISD::CTPOP: {
2398 // If only 1 bit is demanded, replace with PARITY as long as we're before
2399 // op legalization.
2400 // FIXME: Limit to scalars for now.
2401 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2402 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2403 Op.getOperand(0)));
2404
2405 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2406 break;
2407 }
2409 SDValue Op0 = Op.getOperand(0);
2410 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2411 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2412
2413 // If we only care about the highest bit, don't bother shifting right.
2414 if (DemandedBits.isSignMask()) {
2415 unsigned MinSignedBits =
2416 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2417 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2418 // However if the input is already sign extended we expect the sign
2419 // extension to be dropped altogether later and do not simplify.
2420 if (!AlreadySignExtended) {
2421 // Compute the correct shift amount type, which must be getShiftAmountTy
2422 // for scalar types after legalization.
2423 SDValue ShiftAmt =
2424 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2425 return TLO.CombineTo(Op,
2426 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2427 }
2428 }
2429
2430 // If none of the extended bits are demanded, eliminate the sextinreg.
2431 if (DemandedBits.getActiveBits() <= ExVTBits)
2432 return TLO.CombineTo(Op, Op0);
2433
2434 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2435
2436 // Since the sign extended bits are demanded, we know that the sign
2437 // bit is demanded.
2438 InputDemandedBits.setBit(ExVTBits - 1);
2439
2440 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2441 Depth + 1))
2442 return true;
2443
2444 // If the sign bit of the input is known set or clear, then we know the
2445 // top bits of the result.
2446
2447 // If the input sign bit is known zero, convert this into a zero extension.
2448 if (Known.Zero[ExVTBits - 1])
2449 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2450
2451 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2452 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2453 Known.One.setBitsFrom(ExVTBits);
2454 Known.Zero &= Mask;
2455 } else { // Input sign bit unknown
2456 Known.Zero &= Mask;
2457 Known.One &= Mask;
2458 }
2459 break;
2460 }
2461 case ISD::BUILD_PAIR: {
2462 EVT HalfVT = Op.getOperand(0).getValueType();
2463 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2464
2465 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2466 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2467
2468 KnownBits KnownLo, KnownHi;
2469
2470 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2471 return true;
2472
2473 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2474 return true;
2475
2476 Known = KnownHi.concat(KnownLo);
2477 break;
2478 }
2480 if (VT.isScalableVector())
2481 return false;
2482 [[fallthrough]];
2483 case ISD::ZERO_EXTEND: {
2484 SDValue Src = Op.getOperand(0);
2485 EVT SrcVT = Src.getValueType();
2486 unsigned InBits = SrcVT.getScalarSizeInBits();
2487 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2488 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2489
2490 // If none of the top bits are demanded, convert this into an any_extend.
2491 if (DemandedBits.getActiveBits() <= InBits) {
2492 // If we only need the non-extended bits of the bottom element
2493 // then we can just bitcast to the result.
2494 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2495 VT.getSizeInBits() == SrcVT.getSizeInBits())
2496 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2497
2498 unsigned Opc =
2500 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2501 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2502 }
2503
2504 APInt InDemandedBits = DemandedBits.trunc(InBits);
2505 APInt InDemandedElts = DemandedElts.zext(InElts);
2506 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2507 Depth + 1)) {
2508 Op->dropFlags(SDNodeFlags::NonNeg);
2509 return true;
2510 }
2511 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2512 Known = Known.zext(BitWidth);
2513
2514 // Attempt to avoid multi-use ops if we don't need anything from them.
2516 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2517 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2518 break;
2519 }
2521 if (VT.isScalableVector())
2522 return false;
2523 [[fallthrough]];
2524 case ISD::SIGN_EXTEND: {
2525 SDValue Src = Op.getOperand(0);
2526 EVT SrcVT = Src.getValueType();
2527 unsigned InBits = SrcVT.getScalarSizeInBits();
2528 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2529 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2530
2531 APInt InDemandedElts = DemandedElts.zext(InElts);
2532 APInt InDemandedBits = DemandedBits.trunc(InBits);
2533
2534 // Since some of the sign extended bits are demanded, we know that the sign
2535 // bit is demanded.
2536 InDemandedBits.setBit(InBits - 1);
2537
2538 // If none of the top bits are demanded, convert this into an any_extend.
2539 if (DemandedBits.getActiveBits() <= InBits) {
2540 // If we only need the non-extended bits of the bottom element
2541 // then we can just bitcast to the result.
2542 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2543 VT.getSizeInBits() == SrcVT.getSizeInBits())
2544 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2545
2546 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2548 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2549 InBits) {
2550 unsigned Opc =
2552 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2553 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2554 }
2555 }
2556
2557 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2558 Depth + 1))
2559 return true;
2560 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2561
2562 // If the sign bit is known one, the top bits match.
2563 Known = Known.sext(BitWidth);
2564
2565 // If the sign bit is known zero, convert this to a zero extend.
2566 if (Known.isNonNegative()) {
2567 unsigned Opc =
2569 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2570 SDNodeFlags Flags;
2571 if (!IsVecInReg)
2572 Flags |= SDNodeFlags::NonNeg;
2573 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2574 }
2575 }
2576
2577 // Attempt to avoid multi-use ops if we don't need anything from them.
2579 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2580 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2581 break;
2582 }
2584 if (VT.isScalableVector())
2585 return false;
2586 [[fallthrough]];
2587 case ISD::ANY_EXTEND: {
2588 SDValue Src = Op.getOperand(0);
2589 EVT SrcVT = Src.getValueType();
2590 unsigned InBits = SrcVT.getScalarSizeInBits();
2591 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2592 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2593
2594 // If we only need the bottom element then we can just bitcast.
2595 // TODO: Handle ANY_EXTEND?
2596 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2597 VT.getSizeInBits() == SrcVT.getSizeInBits())
2598 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2599
2600 APInt InDemandedBits = DemandedBits.trunc(InBits);
2601 APInt InDemandedElts = DemandedElts.zext(InElts);
2602 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2603 Depth + 1))
2604 return true;
2605 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2606 Known = Known.anyext(BitWidth);
2607
2608 // Attempt to avoid multi-use ops if we don't need anything from them.
2610 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2611 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2612 break;
2613 }
2614 case ISD::TRUNCATE: {
2615 SDValue Src = Op.getOperand(0);
2616
2617 // Simplify the input, using demanded bit information, and compute the known
2618 // zero/one bits live out.
2619 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2620 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2621 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2622 Depth + 1)) {
2623 // Disable the nsw and nuw flags. We can no longer guarantee that we
2624 // won't wrap after simplification.
2625 Op->dropFlags(SDNodeFlags::NoWrap);
2626 return true;
2627 }
2628 Known = Known.trunc(BitWidth);
2629
2630 // Attempt to avoid multi-use ops if we don't need anything from them.
2632 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2633 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2634
2635 // If the input is only used by this truncate, see if we can shrink it based
2636 // on the known demanded bits.
2637 switch (Src.getOpcode()) {
2638 default:
2639 break;
2640 case ISD::SRL:
2641 // Shrink SRL by a constant if none of the high bits shifted in are
2642 // demanded.
2643 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2644 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2645 // undesirable.
2646 break;
2647
2648 if (Src.getNode()->hasOneUse()) {
2649 if (isTruncateFree(Src, VT) &&
2650 !isTruncateFree(Src.getValueType(), VT)) {
2651 // If truncate is only free at trunc(srl), do not turn it into
2652 // srl(trunc). The check is done by first check the truncate is free
2653 // at Src's opcode(srl), then check the truncate is not done by
2654 // referencing sub-register. In test, if both trunc(srl) and
2655 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2656 // trunc(srl)'s trunc is free, trunc(srl) is better.
2657 break;
2658 }
2659
2660 std::optional<unsigned> ShAmtC =
2661 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2662 if (!ShAmtC || *ShAmtC >= BitWidth)
2663 break;
2664 unsigned ShVal = *ShAmtC;
2665
2666 APInt HighBits =
2667 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2668 HighBits.lshrInPlace(ShVal);
2669 HighBits = HighBits.trunc(BitWidth);
2670 if (!(HighBits & DemandedBits)) {
2671 // None of the shifted in bits are needed. Add a truncate of the
2672 // shift input, then shift it.
2673 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2674 SDValue NewTrunc =
2675 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2676 return TLO.CombineTo(
2677 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2678 }
2679 }
2680 break;
2681 }
2682
2683 break;
2684 }
2685 case ISD::AssertZext: {
2686 // AssertZext demands all of the high bits, plus any of the low bits
2687 // demanded by its users.
2688 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2690 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2691 TLO, Depth + 1))
2692 return true;
2693
2694 Known.Zero |= ~InMask;
2695 Known.One &= (~Known.Zero);
2696 break;
2697 }
2699 SDValue Src = Op.getOperand(0);
2700 SDValue Idx = Op.getOperand(1);
2701 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2702 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2703
2704 if (SrcEltCnt.isScalable())
2705 return false;
2706
2707 // Demand the bits from every vector element without a constant index.
2708 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2709 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2710 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2711 if (CIdx->getAPIntValue().ult(NumSrcElts))
2712 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2713
2714 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2715 // anything about the extended bits.
2716 APInt DemandedSrcBits = DemandedBits;
2717 if (BitWidth > EltBitWidth)
2718 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2719
2720 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2721 Depth + 1))
2722 return true;
2723
2724 // Attempt to avoid multi-use ops if we don't need anything from them.
2725 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2726 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2727 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2728 SDValue NewOp =
2729 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2730 return TLO.CombineTo(Op, NewOp);
2731 }
2732 }
2733
2734 Known = Known2;
2735 if (BitWidth > EltBitWidth)
2736 Known = Known.anyext(BitWidth);
2737 break;
2738 }
2739 case ISD::BITCAST: {
2740 if (VT.isScalableVector())
2741 return false;
2742 SDValue Src = Op.getOperand(0);
2743 EVT SrcVT = Src.getValueType();
2744 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2745
2746 // If this is an FP->Int bitcast and if the sign bit is the only
2747 // thing demanded, turn this into a FGETSIGN.
2748 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2749 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2750 SrcVT.isFloatingPoint()) {
2751 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2752 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2753 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2754 SrcVT != MVT::f128) {
2755 // Cannot eliminate/lower SHL for f128 yet.
2756 EVT Ty = OpVTLegal ? VT : MVT::i32;
2757 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2758 // place. We expect the SHL to be eliminated by other optimizations.
2759 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2760 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2761 if (!OpVTLegal && OpVTSizeInBits > 32)
2762 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2763 unsigned ShVal = Op.getValueSizeInBits() - 1;
2764 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2765 return TLO.CombineTo(Op,
2766 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2767 }
2768 }
2769
2770 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2771 // Demand the elt/bit if any of the original elts/bits are demanded.
2772 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2773 unsigned Scale = BitWidth / NumSrcEltBits;
2774 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2775 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2776 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2777 for (unsigned i = 0; i != Scale; ++i) {
2778 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2779 unsigned BitOffset = EltOffset * NumSrcEltBits;
2780 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2781 if (!Sub.isZero()) {
2782 DemandedSrcBits |= Sub;
2783 for (unsigned j = 0; j != NumElts; ++j)
2784 if (DemandedElts[j])
2785 DemandedSrcElts.setBit((j * Scale) + i);
2786 }
2787 }
2788
2789 APInt KnownSrcUndef, KnownSrcZero;
2790 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2791 KnownSrcZero, TLO, Depth + 1))
2792 return true;
2793
2794 KnownBits KnownSrcBits;
2795 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2796 KnownSrcBits, TLO, Depth + 1))
2797 return true;
2798 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2799 // TODO - bigendian once we have test coverage.
2800 unsigned Scale = NumSrcEltBits / BitWidth;
2801 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2802 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2803 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2804 for (unsigned i = 0; i != NumElts; ++i)
2805 if (DemandedElts[i]) {
2806 unsigned Offset = (i % Scale) * BitWidth;
2807 DemandedSrcBits.insertBits(DemandedBits, Offset);
2808 DemandedSrcElts.setBit(i / Scale);
2809 }
2810
2811 if (SrcVT.isVector()) {
2812 APInt KnownSrcUndef, KnownSrcZero;
2813 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2814 KnownSrcZero, TLO, Depth + 1))
2815 return true;
2816 }
2817
2818 KnownBits KnownSrcBits;
2819 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2820 KnownSrcBits, TLO, Depth + 1))
2821 return true;
2822
2823 // Attempt to avoid multi-use ops if we don't need anything from them.
2824 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2825 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2826 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2827 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2828 return TLO.CombineTo(Op, NewOp);
2829 }
2830 }
2831 }
2832
2833 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2834 // recursive call where Known may be useful to the caller.
2835 if (Depth > 0) {
2836 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2837 return false;
2838 }
2839 break;
2840 }
2841 case ISD::MUL:
2842 if (DemandedBits.isPowerOf2()) {
2843 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2844 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2845 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2846 unsigned CTZ = DemandedBits.countr_zero();
2847 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2848 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2849 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2850 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2851 return TLO.CombineTo(Op, Shl);
2852 }
2853 }
2854 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2855 // X * X is odd iff X is odd.
2856 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2857 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2858 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2859 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2860 return TLO.CombineTo(Op, And1);
2861 }
2862 [[fallthrough]];
2863 case ISD::ADD:
2864 case ISD::SUB: {
2865 // Add, Sub, and Mul don't demand any bits in positions beyond that
2866 // of the highest bit demanded of them.
2867 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2868 SDNodeFlags Flags = Op.getNode()->getFlags();
2869 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2870 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2871 KnownBits KnownOp0, KnownOp1;
2872 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2873 const KnownBits &KnownRHS) {
2874 if (Op.getOpcode() == ISD::MUL)
2875 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2876 return Demanded;
2877 };
2878 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2879 Depth + 1) ||
2880 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2881 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2882 // See if the operation should be performed at a smaller bit width.
2884 // Disable the nsw and nuw flags. We can no longer guarantee that we
2885 // won't wrap after simplification.
2886 Op->dropFlags(SDNodeFlags::NoWrap);
2887 return true;
2888 }
2889
2890 // neg x with only low bit demanded is simply x.
2891 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2892 isNullConstant(Op0))
2893 return TLO.CombineTo(Op, Op1);
2894
2895 // Attempt to avoid multi-use ops if we don't need anything from them.
2896 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2898 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2900 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2901 if (DemandedOp0 || DemandedOp1) {
2902 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2903 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2904 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2905 Flags & ~SDNodeFlags::NoWrap);
2906 return TLO.CombineTo(Op, NewOp);
2907 }
2908 }
2909
2910 // If we have a constant operand, we may be able to turn it into -1 if we
2911 // do not demand the high bits. This can make the constant smaller to
2912 // encode, allow more general folding, or match specialized instruction
2913 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2914 // is probably not useful (and could be detrimental).
2916 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2917 if (C && !C->isAllOnes() && !C->isOne() &&
2918 (C->getAPIntValue() | HighMask).isAllOnes()) {
2919 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2920 // Disable the nsw and nuw flags. We can no longer guarantee that we
2921 // won't wrap after simplification.
2922 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2923 Flags & ~SDNodeFlags::NoWrap);
2924 return TLO.CombineTo(Op, NewOp);
2925 }
2926
2927 // Match a multiply with a disguised negated-power-of-2 and convert to a
2928 // an equivalent shift-left amount.
2929 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2930 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2931 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2932 return 0;
2933
2934 // Don't touch opaque constants. Also, ignore zero and power-of-2
2935 // multiplies. Those will get folded later.
2936 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2937 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2938 !MulC->getAPIntValue().isPowerOf2()) {
2939 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2940 if (UnmaskedC.isNegatedPowerOf2())
2941 return (-UnmaskedC).logBase2();
2942 }
2943 return 0;
2944 };
2945
2946 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2947 unsigned ShlAmt) {
2948 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2949 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2950 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2951 return TLO.CombineTo(Op, Res);
2952 };
2953
2955 if (Op.getOpcode() == ISD::ADD) {
2956 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2957 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2958 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2959 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2960 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2961 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2962 }
2963 if (Op.getOpcode() == ISD::SUB) {
2964 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2965 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2966 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2967 }
2968 }
2969
2970 if (Op.getOpcode() == ISD::MUL) {
2971 Known = KnownBits::mul(KnownOp0, KnownOp1);
2972 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2974 Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2975 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2976 }
2977 break;
2978 }
2979 case ISD::FABS: {
2980 SDValue Op0 = Op.getOperand(0);
2981 APInt SignMask = APInt::getSignMask(BitWidth);
2982
2983 if (!DemandedBits.intersects(SignMask))
2984 return TLO.CombineTo(Op, Op0);
2985
2986 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
2987 Depth + 1))
2988 return true;
2989
2990 if (Known.isNonNegative())
2991 return TLO.CombineTo(Op, Op0);
2992 if (Known.isNegative())
2993 return TLO.CombineTo(
2994 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
2995
2996 Known.Zero |= SignMask;
2997 Known.One &= ~SignMask;
2998
2999 break;
3000 }
3001 case ISD::FCOPYSIGN: {
3002 SDValue Op0 = Op.getOperand(0);
3003 SDValue Op1 = Op.getOperand(1);
3004
3005 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3006 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3007 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3008 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3009
3010 if (!DemandedBits.intersects(SignMask0))
3011 return TLO.CombineTo(Op, Op0);
3012
3013 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3014 Known, TLO, Depth + 1) ||
3015 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3016 Depth + 1))
3017 return true;
3018
3019 if (Known2.isNonNegative())
3020 return TLO.CombineTo(
3021 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3022
3023 if (Known2.isNegative())
3024 return TLO.CombineTo(
3025 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3026 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3027
3028 Known.Zero &= ~SignMask0;
3029 Known.One &= ~SignMask0;
3030 break;
3031 }
3032 case ISD::FNEG: {
3033 SDValue Op0 = Op.getOperand(0);
3034 APInt SignMask = APInt::getSignMask(BitWidth);
3035
3036 if (!DemandedBits.intersects(SignMask))
3037 return TLO.CombineTo(Op, Op0);
3038
3039 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3040 Depth + 1))
3041 return true;
3042
3043 if (!Known.isSignUnknown()) {
3044 Known.Zero ^= SignMask;
3045 Known.One ^= SignMask;
3046 }
3047
3048 break;
3049 }
3050 default:
3051 // We also ask the target about intrinsics (which could be specific to it).
3052 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3053 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3054 // TODO: Probably okay to remove after audit; here to reduce change size
3055 // in initial enablement patch for scalable vectors
3056 if (Op.getValueType().isScalableVector())
3057 break;
3059 Known, TLO, Depth))
3060 return true;
3061 break;
3062 }
3063
3064 // Just use computeKnownBits to compute output bits.
3065 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3066 break;
3067 }
3068
3069 // If we know the value of all of the demanded bits, return this as a
3070 // constant.
3072 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3073 // Avoid folding to a constant if any OpaqueConstant is involved.
3074 if (llvm::any_of(Op->ops(), [](SDValue V) {
3075 auto *C = dyn_cast<ConstantSDNode>(V);
3076 return C && C->isOpaque();
3077 }))
3078 return false;
3079 if (VT.isInteger())
3080 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3081 if (VT.isFloatingPoint())
3082 return TLO.CombineTo(
3083 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3084 dl, VT));
3085 }
3086
3087 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3088 // Try again just for the original demanded elts.
3089 // Ensure we do this AFTER constant folding above.
3090 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3091 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3092
3093 return false;
3094}
3095
3097 const APInt &DemandedElts,
3098 DAGCombinerInfo &DCI) const {
3099 SelectionDAG &DAG = DCI.DAG;
3100 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3101 !DCI.isBeforeLegalizeOps());
3102
3103 APInt KnownUndef, KnownZero;
3104 bool Simplified =
3105 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3106 if (Simplified) {
3107 DCI.AddToWorklist(Op.getNode());
3108 DCI.CommitTargetLoweringOpt(TLO);
3109 }
3110
3111 return Simplified;
3112}
3113
3114/// Given a vector binary operation and known undefined elements for each input
3115/// operand, compute whether each element of the output is undefined.
3117 const APInt &UndefOp0,
3118 const APInt &UndefOp1) {
3119 EVT VT = BO.getValueType();
3121 "Vector binop only");
3122
3123 EVT EltVT = VT.getVectorElementType();
3124 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3125 assert(UndefOp0.getBitWidth() == NumElts &&
3126 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3127
3128 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3129 const APInt &UndefVals) {
3130 if (UndefVals[Index])
3131 return DAG.getUNDEF(EltVT);
3132
3133 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3134 // Try hard to make sure that the getNode() call is not creating temporary
3135 // nodes. Ignore opaque integers because they do not constant fold.
3136 SDValue Elt = BV->getOperand(Index);
3137 auto *C = dyn_cast<ConstantSDNode>(Elt);
3138 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3139 return Elt;
3140 }
3141
3142 return SDValue();
3143 };
3144
3145 APInt KnownUndef = APInt::getZero(NumElts);
3146 for (unsigned i = 0; i != NumElts; ++i) {
3147 // If both inputs for this element are either constant or undef and match
3148 // the element type, compute the constant/undef result for this element of
3149 // the vector.
3150 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3151 // not handle FP constants. The code within getNode() should be refactored
3152 // to avoid the danger of creating a bogus temporary node here.
3153 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3154 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3155 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3156 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3157 KnownUndef.setBit(i);
3158 }
3159 return KnownUndef;
3160}
3161
3163 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3164 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3165 bool AssumeSingleUse) const {
3166 EVT VT = Op.getValueType();
3167 unsigned Opcode = Op.getOpcode();
3168 APInt DemandedElts = OriginalDemandedElts;
3169 unsigned NumElts = DemandedElts.getBitWidth();
3170 assert(VT.isVector() && "Expected vector op");
3171
3172 KnownUndef = KnownZero = APInt::getZero(NumElts);
3173
3175 return false;
3176
3177 // TODO: For now we assume we know nothing about scalable vectors.
3178 if (VT.isScalableVector())
3179 return false;
3180
3181 assert(VT.getVectorNumElements() == NumElts &&
3182 "Mask size mismatches value type element count!");
3183
3184 // Undef operand.
3185 if (Op.isUndef()) {
3186 KnownUndef.setAllBits();
3187 return false;
3188 }
3189
3190 // If Op has other users, assume that all elements are needed.
3191 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3192 DemandedElts.setAllBits();
3193
3194 // Not demanding any elements from Op.
3195 if (DemandedElts == 0) {
3196 KnownUndef.setAllBits();
3197 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3198 }
3199
3200 // Limit search depth.
3202 return false;
3203
3204 SDLoc DL(Op);
3205 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3206 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3207
3208 // Helper for demanding the specified elements and all the bits of both binary
3209 // operands.
3210 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3211 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3212 TLO.DAG, Depth + 1);
3213 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3214 TLO.DAG, Depth + 1);
3215 if (NewOp0 || NewOp1) {
3216 SDValue NewOp =
3217 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3218 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3219 return TLO.CombineTo(Op, NewOp);
3220 }
3221 return false;
3222 };
3223
3224 switch (Opcode) {
3225 case ISD::SCALAR_TO_VECTOR: {
3226 if (!DemandedElts[0]) {
3227 KnownUndef.setAllBits();
3228 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3229 }
3230 KnownUndef.setHighBits(NumElts - 1);
3231 break;
3232 }
3233 case ISD::BITCAST: {
3234 SDValue Src = Op.getOperand(0);
3235 EVT SrcVT = Src.getValueType();
3236
3237 if (!SrcVT.isVector()) {
3238 // TODO - bigendian once we have test coverage.
3239 if (IsLE) {
3240 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3241 unsigned EltSize = VT.getScalarSizeInBits();
3242 for (unsigned I = 0; I != NumElts; ++I) {
3243 if (DemandedElts[I]) {
3244 unsigned Offset = I * EltSize;
3245 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3246 }
3247 }
3248 KnownBits Known;
3249 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3250 return true;
3251 }
3252 break;
3253 }
3254
3255 // Fast handling of 'identity' bitcasts.
3256 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3257 if (NumSrcElts == NumElts)
3258 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3259 KnownZero, TLO, Depth + 1);
3260
3261 APInt SrcDemandedElts, SrcZero, SrcUndef;
3262
3263 // Bitcast from 'large element' src vector to 'small element' vector, we
3264 // must demand a source element if any DemandedElt maps to it.
3265 if ((NumElts % NumSrcElts) == 0) {
3266 unsigned Scale = NumElts / NumSrcElts;
3267 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3268 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3269 TLO, Depth + 1))
3270 return true;
3271
3272 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3273 // of the large element.
3274 // TODO - bigendian once we have test coverage.
3275 if (IsLE) {
3276 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3277 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3278 for (unsigned i = 0; i != NumElts; ++i)
3279 if (DemandedElts[i]) {
3280 unsigned Ofs = (i % Scale) * EltSizeInBits;
3281 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3282 }
3283
3284 KnownBits Known;
3285 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3286 TLO, Depth + 1))
3287 return true;
3288
3289 // The bitcast has split each wide element into a number of
3290 // narrow subelements. We have just computed the Known bits
3291 // for wide elements. See if element splitting results in
3292 // some subelements being zero. Only for demanded elements!
3293 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3294 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3295 .isAllOnes())
3296 continue;
3297 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3298 unsigned Elt = Scale * SrcElt + SubElt;
3299 if (DemandedElts[Elt])
3300 KnownZero.setBit(Elt);
3301 }
3302 }
3303 }
3304
3305 // If the src element is zero/undef then all the output elements will be -
3306 // only demanded elements are guaranteed to be correct.
3307 for (unsigned i = 0; i != NumSrcElts; ++i) {
3308 if (SrcDemandedElts[i]) {
3309 if (SrcZero[i])
3310 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3311 if (SrcUndef[i])
3312 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3313 }
3314 }
3315 }
3316
3317 // Bitcast from 'small element' src vector to 'large element' vector, we
3318 // demand all smaller source elements covered by the larger demanded element
3319 // of this vector.
3320 if ((NumSrcElts % NumElts) == 0) {
3321 unsigned Scale = NumSrcElts / NumElts;
3322 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3323 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3324 TLO, Depth + 1))
3325 return true;
3326
3327 // If all the src elements covering an output element are zero/undef, then
3328 // the output element will be as well, assuming it was demanded.
3329 for (unsigned i = 0; i != NumElts; ++i) {
3330 if (DemandedElts[i]) {
3331 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3332 KnownZero.setBit(i);
3333 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3334 KnownUndef.setBit(i);
3335 }
3336 }
3337 }
3338 break;
3339 }
3340 case ISD::FREEZE: {
3341 SDValue N0 = Op.getOperand(0);
3342 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3343 /*PoisonOnly=*/false,
3344 Depth + 1))
3345 return TLO.CombineTo(Op, N0);
3346
3347 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3348 // freeze(op(x, ...)) -> op(freeze(x), ...).
3349 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3350 return TLO.CombineTo(
3352 TLO.DAG.getFreeze(N0.getOperand(0))));
3353 break;
3354 }
3355 case ISD::BUILD_VECTOR: {
3356 // Check all elements and simplify any unused elements with UNDEF.
3357 if (!DemandedElts.isAllOnes()) {
3358 // Don't simplify BROADCASTS.
3359 if (llvm::any_of(Op->op_values(),
3360 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3362 bool Updated = false;
3363 for (unsigned i = 0; i != NumElts; ++i) {
3364 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3365 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3366 KnownUndef.setBit(i);
3367 Updated = true;
3368 }
3369 }
3370 if (Updated)
3371 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3372 }
3373 }
3374 for (unsigned i = 0; i != NumElts; ++i) {
3375 SDValue SrcOp = Op.getOperand(i);
3376 if (SrcOp.isUndef()) {
3377 KnownUndef.setBit(i);
3378 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3380 KnownZero.setBit(i);
3381 }
3382 }
3383 break;
3384 }
3385 case ISD::CONCAT_VECTORS: {
3386 EVT SubVT = Op.getOperand(0).getValueType();
3387 unsigned NumSubVecs = Op.getNumOperands();
3388 unsigned NumSubElts = SubVT.getVectorNumElements();
3389 for (unsigned i = 0; i != NumSubVecs; ++i) {
3390 SDValue SubOp = Op.getOperand(i);
3391 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3392 APInt SubUndef, SubZero;
3393 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3394 Depth + 1))
3395 return true;
3396 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3397 KnownZero.insertBits(SubZero, i * NumSubElts);
3398 }
3399
3400 // Attempt to avoid multi-use ops if we don't need anything from them.
3401 if (!DemandedElts.isAllOnes()) {
3402 bool FoundNewSub = false;
3403 SmallVector<SDValue, 2> DemandedSubOps;
3404 for (unsigned i = 0; i != NumSubVecs; ++i) {
3405 SDValue SubOp = Op.getOperand(i);
3406 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3408 SubOp, SubElts, TLO.DAG, Depth + 1);
3409 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3410 FoundNewSub = NewSubOp ? true : FoundNewSub;
3411 }
3412 if (FoundNewSub) {
3413 SDValue NewOp =
3414 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3415 return TLO.CombineTo(Op, NewOp);
3416 }
3417 }
3418 break;
3419 }
3420 case ISD::INSERT_SUBVECTOR: {
3421 // Demand any elements from the subvector and the remainder from the src its
3422 // inserted into.
3423 SDValue Src = Op.getOperand(0);
3424 SDValue Sub = Op.getOperand(1);
3425 uint64_t Idx = Op.getConstantOperandVal(2);
3426 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3427 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3428 APInt DemandedSrcElts = DemandedElts;
3429 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3430
3431 APInt SubUndef, SubZero;
3432 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3433 Depth + 1))
3434 return true;
3435
3436 // If none of the src operand elements are demanded, replace it with undef.
3437 if (!DemandedSrcElts && !Src.isUndef())
3438 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3439 TLO.DAG.getUNDEF(VT), Sub,
3440 Op.getOperand(2)));
3441
3442 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3443 TLO, Depth + 1))
3444 return true;
3445 KnownUndef.insertBits(SubUndef, Idx);
3446 KnownZero.insertBits(SubZero, Idx);
3447
3448 // Attempt to avoid multi-use ops if we don't need anything from them.
3449 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3451 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3453 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3454 if (NewSrc || NewSub) {
3455 NewSrc = NewSrc ? NewSrc : Src;
3456 NewSub = NewSub ? NewSub : Sub;
3457 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3458 NewSub, Op.getOperand(2));
3459 return TLO.CombineTo(Op, NewOp);
3460 }
3461 }
3462 break;
3463 }
3465 // Offset the demanded elts by the subvector index.
3466 SDValue Src = Op.getOperand(0);
3467 if (Src.getValueType().isScalableVector())
3468 break;
3469 uint64_t Idx = Op.getConstantOperandVal(1);
3470 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3471 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3472
3473 APInt SrcUndef, SrcZero;
3474 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3475 Depth + 1))
3476 return true;
3477 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3478 KnownZero = SrcZero.extractBits(NumElts, Idx);
3479
3480 // Attempt to avoid multi-use ops if we don't need anything from them.
3481 if (!DemandedElts.isAllOnes()) {
3483 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3484 if (NewSrc) {
3485 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3486 Op.getOperand(1));
3487 return TLO.CombineTo(Op, NewOp);
3488 }
3489 }
3490 break;
3491 }
3493 SDValue Vec = Op.getOperand(0);
3494 SDValue Scl = Op.getOperand(1);
3495 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3496
3497 // For a legal, constant insertion index, if we don't need this insertion
3498 // then strip it, else remove it from the demanded elts.
3499 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3500 unsigned Idx = CIdx->getZExtValue();
3501 if (!DemandedElts[Idx])
3502 return TLO.CombineTo(Op, Vec);
3503
3504 APInt DemandedVecElts(DemandedElts);
3505 DemandedVecElts.clearBit(Idx);
3506 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3507 KnownZero, TLO, Depth + 1))
3508 return true;
3509
3510 KnownUndef.setBitVal(Idx, Scl.isUndef());
3511
3512 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3513 break;
3514 }
3515
3516 APInt VecUndef, VecZero;
3517 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3518 Depth + 1))
3519 return true;
3520 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3521 break;
3522 }
3523 case ISD::VSELECT: {
3524 SDValue Sel = Op.getOperand(0);
3525 SDValue LHS = Op.getOperand(1);
3526 SDValue RHS = Op.getOperand(2);
3527
3528 // Try to transform the select condition based on the current demanded
3529 // elements.
3530 APInt UndefSel, ZeroSel;
3531 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3532 Depth + 1))
3533 return true;
3534
3535 // See if we can simplify either vselect operand.
3536 APInt DemandedLHS(DemandedElts);
3537 APInt DemandedRHS(DemandedElts);
3538 APInt UndefLHS, ZeroLHS;
3539 APInt UndefRHS, ZeroRHS;
3540 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3541 Depth + 1))
3542 return true;
3543 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3544 Depth + 1))
3545 return true;
3546
3547 KnownUndef = UndefLHS & UndefRHS;
3548 KnownZero = ZeroLHS & ZeroRHS;
3549
3550 // If we know that the selected element is always zero, we don't need the
3551 // select value element.
3552 APInt DemandedSel = DemandedElts & ~KnownZero;
3553 if (DemandedSel != DemandedElts)
3554 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3555 Depth + 1))
3556 return true;
3557
3558 break;
3559 }
3560 case ISD::VECTOR_SHUFFLE: {
3561 SDValue LHS = Op.getOperand(0);
3562 SDValue RHS = Op.getOperand(1);
3563 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3564
3565 // Collect demanded elements from shuffle operands..
3566 APInt DemandedLHS(NumElts, 0);
3567 APInt DemandedRHS(NumElts, 0);
3568 for (unsigned i = 0; i != NumElts; ++i) {
3569 int M = ShuffleMask[i];
3570 if (M < 0 || !DemandedElts[i])
3571 continue;
3572 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3573 if (M < (int)NumElts)
3574 DemandedLHS.setBit(M);
3575 else
3576 DemandedRHS.setBit(M - NumElts);
3577 }
3578
3579 // If either side isn't demanded, replace it by UNDEF. We handle this
3580 // explicitly here to also simplify in case of multiple uses (on the
3581 // contrary to the SimplifyDemandedVectorElts calls below).
3582 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3583 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3584 if (FoldLHS || FoldRHS) {
3585 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3586 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3587 SDValue NewOp =
3588 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3589 return TLO.CombineTo(Op, NewOp);
3590 }
3591
3592 // See if we can simplify either shuffle operand.
3593 APInt UndefLHS, ZeroLHS;
3594 APInt UndefRHS, ZeroRHS;
3595 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3596 Depth + 1))
3597 return true;
3598 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3599 Depth + 1))
3600 return true;
3601
3602 // Simplify mask using undef elements from LHS/RHS.
3603 bool Updated = false;
3604 bool IdentityLHS = true, IdentityRHS = true;
3605 SmallVector<int, 32> NewMask(ShuffleMask);
3606 for (unsigned i = 0; i != NumElts; ++i) {
3607 int &M = NewMask[i];
3608 if (M < 0)
3609 continue;
3610 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3611 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3612 Updated = true;
3613 M = -1;
3614 }
3615 IdentityLHS &= (M < 0) || (M == (int)i);
3616 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3617 }
3618
3619 // Update legal shuffle masks based on demanded elements if it won't reduce
3620 // to Identity which can cause premature removal of the shuffle mask.
3621 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3622 SDValue LegalShuffle =
3623 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3624 if (LegalShuffle)
3625 return TLO.CombineTo(Op, LegalShuffle);
3626 }
3627
3628 // Propagate undef/zero elements from LHS/RHS.
3629 for (unsigned i = 0; i != NumElts; ++i) {
3630 int M = ShuffleMask[i];
3631 if (M < 0) {
3632 KnownUndef.setBit(i);
3633 } else if (M < (int)NumElts) {
3634 if (UndefLHS[M])
3635 KnownUndef.setBit(i);
3636 if (ZeroLHS[M])
3637 KnownZero.setBit(i);
3638 } else {
3639 if (UndefRHS[M - NumElts])
3640 KnownUndef.setBit(i);
3641 if (ZeroRHS[M - NumElts])
3642 KnownZero.setBit(i);
3643 }
3644 }
3645 break;
3646 }
3650 APInt SrcUndef, SrcZero;
3651 SDValue Src = Op.getOperand(0);
3652 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3653 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3654 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3655 Depth + 1))
3656 return true;
3657 KnownZero = SrcZero.zextOrTrunc(NumElts);
3658 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3659
3660 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3661 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3662 DemandedSrcElts == 1) {
3663 // aext - if we just need the bottom element then we can bitcast.
3664 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3665 }
3666
3667 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3668 // zext(undef) upper bits are guaranteed to be zero.
3669 if (DemandedElts.isSubsetOf(KnownUndef))
3670 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3671 KnownUndef.clearAllBits();
3672
3673 // zext - if we just need the bottom element then we can mask:
3674 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3675 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3676 Op->isOnlyUserOf(Src.getNode()) &&
3677 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3678 SDLoc DL(Op);
3679 EVT SrcVT = Src.getValueType();
3680 EVT SrcSVT = SrcVT.getScalarType();
3681 SmallVector<SDValue> MaskElts;
3682 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3683 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3684 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3685 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3686 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3687 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3688 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3689 }
3690 }
3691 }
3692 break;
3693 }
3694
3695 // TODO: There are more binop opcodes that could be handled here - MIN,
3696 // MAX, saturated math, etc.
3697 case ISD::ADD: {
3698 SDValue Op0 = Op.getOperand(0);
3699 SDValue Op1 = Op.getOperand(1);
3700 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3701 APInt UndefLHS, ZeroLHS;
3702 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3703 Depth + 1, /*AssumeSingleUse*/ true))
3704 return true;
3705 }
3706 [[fallthrough]];
3707 }
3708 case ISD::AVGCEILS:
3709 case ISD::AVGCEILU:
3710 case ISD::AVGFLOORS:
3711 case ISD::AVGFLOORU:
3712 case ISD::OR:
3713 case ISD::XOR:
3714 case ISD::SUB:
3715 case ISD::FADD:
3716 case ISD::FSUB:
3717 case ISD::FMUL:
3718 case ISD::FDIV:
3719 case ISD::FREM: {
3720 SDValue Op0 = Op.getOperand(0);
3721 SDValue Op1 = Op.getOperand(1);
3722
3723 APInt UndefRHS, ZeroRHS;
3724 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3725 Depth + 1))
3726 return true;
3727 APInt UndefLHS, ZeroLHS;
3728 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3729 Depth + 1))
3730 return true;
3731
3732 KnownZero = ZeroLHS & ZeroRHS;
3733 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3734
3735 // Attempt to avoid multi-use ops if we don't need anything from them.
3736 // TODO - use KnownUndef to relax the demandedelts?
3737 if (!DemandedElts.isAllOnes())
3738 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3739 return true;
3740 break;
3741 }
3742 case ISD::SHL:
3743 case ISD::SRL:
3744 case ISD::SRA:
3745 case ISD::ROTL:
3746 case ISD::ROTR: {
3747 SDValue Op0 = Op.getOperand(0);
3748 SDValue Op1 = Op.getOperand(1);
3749
3750 APInt UndefRHS, ZeroRHS;
3751 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3752 Depth + 1))
3753 return true;
3754 APInt UndefLHS, ZeroLHS;
3755 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3756 Depth + 1))
3757 return true;
3758
3759 KnownZero = ZeroLHS;
3760 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3761
3762 // Attempt to avoid multi-use ops if we don't need anything from them.
3763 // TODO - use KnownUndef to relax the demandedelts?
3764 if (!DemandedElts.isAllOnes())
3765 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3766 return true;
3767 break;
3768 }
3769 case ISD::MUL:
3770 case ISD::MULHU:
3771 case ISD::MULHS:
3772 case ISD::AND: {
3773 SDValue Op0 = Op.getOperand(0);
3774 SDValue Op1 = Op.getOperand(1);
3775
3776 APInt SrcUndef, SrcZero;
3777 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3778 Depth + 1))
3779 return true;
3780 // If we know that a demanded element was zero in Op1 we don't need to
3781 // demand it in Op0 - its guaranteed to be zero.
3782 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3783 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3784 TLO, Depth + 1))
3785 return true;
3786
3787 KnownUndef &= DemandedElts0;
3788 KnownZero &= DemandedElts0;
3789
3790 // If every element pair has a zero/undef then just fold to zero.
3791 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3792 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3793 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3794 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3795
3796 // If either side has a zero element, then the result element is zero, even
3797 // if the other is an UNDEF.
3798 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3799 // and then handle 'and' nodes with the rest of the binop opcodes.
3800 KnownZero |= SrcZero;
3801 KnownUndef &= SrcUndef;
3802 KnownUndef &= ~KnownZero;
3803
3804 // Attempt to avoid multi-use ops if we don't need anything from them.
3805 if (!DemandedElts.isAllOnes())
3806 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3807 return true;
3808 break;
3809 }
3810 case ISD::TRUNCATE:
3811 case ISD::SIGN_EXTEND:
3812 case ISD::ZERO_EXTEND:
3813 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3814 KnownZero, TLO, Depth + 1))
3815 return true;
3816
3817 if (!DemandedElts.isAllOnes())
3819 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3820 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3821
3822 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3823 // zext(undef) upper bits are guaranteed to be zero.
3824 if (DemandedElts.isSubsetOf(KnownUndef))
3825 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3826 KnownUndef.clearAllBits();
3827 }
3828 break;
3829 case ISD::SINT_TO_FP:
3830 case ISD::UINT_TO_FP:
3831 case ISD::FP_TO_SINT:
3832 case ISD::FP_TO_UINT:
3833 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3834 KnownZero, TLO, Depth + 1))
3835 return true;
3836 // Don't fall through to generic undef -> undef handling.
3837 return false;
3838 default: {
3839 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3840 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3841 KnownZero, TLO, Depth))
3842 return true;
3843 } else {
3844 KnownBits Known;
3845 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3846 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3847 TLO, Depth, AssumeSingleUse))
3848 return true;
3849 }
3850 break;
3851 }
3852 }
3853 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3854
3855 // Constant fold all undef cases.
3856 // TODO: Handle zero cases as well.
3857 if (DemandedElts.isSubsetOf(KnownUndef))
3858 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3859
3860 return false;
3861}
3862
3863/// Determine which of the bits specified in Mask are known to be either zero or
3864/// one and return them in the Known.
3866 KnownBits &Known,
3867 const APInt &DemandedElts,
3868 const SelectionDAG &DAG,
3869 unsigned Depth) const {
3870 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3871 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3872 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3873 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3874 "Should use MaskedValueIsZero if you don't know whether Op"
3875 " is a target node!");
3876 Known.resetAll();
3877}
3878
3881 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3882 unsigned Depth) const {
3883 Known.resetAll();
3884}
3885
3888 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3889 unsigned Depth) const {
3890 Known.resetAll();
3891}
3892
3894 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3895 // The low bits are known zero if the pointer is aligned.
3896 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3897}
3898
3904
3905/// This method can be implemented by targets that want to expose additional
3906/// information about sign bits to the DAG Combiner.
3908 const APInt &,
3909 const SelectionDAG &,
3910 unsigned Depth) const {
3911 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3912 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3913 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3914 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3915 "Should use ComputeNumSignBits if you don't know whether Op"
3916 " is a target node!");
3917 return 1;
3918}
3919
3921 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3922 const MachineRegisterInfo &MRI, unsigned Depth) const {
3923 return 1;
3924}
3925
3927 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3928 TargetLoweringOpt &TLO, unsigned Depth) const {
3929 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3930 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3931 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3932 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3933 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3934 " is a target node!");
3935 return false;
3936}
3937
3939 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3940 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3941 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3942 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3943 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3944 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3945 "Should use SimplifyDemandedBits if you don't know whether Op"
3946 " is a target node!");
3947 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3948 return false;
3949}
3950
3952 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3953 SelectionDAG &DAG, unsigned Depth) const {
3954 assert(
3955 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3956 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3957 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3958 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3959 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3960 " is a target node!");
3961 return SDValue();
3962}
3963
3964SDValue
3967 SelectionDAG &DAG) const {
3968 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3969 if (!LegalMask) {
3970 std::swap(N0, N1);
3972 LegalMask = isShuffleMaskLegal(Mask, VT);
3973 }
3974
3975 if (!LegalMask)
3976 return SDValue();
3977
3978 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3979}
3980
3982 return nullptr;
3983}
3984
3986 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3987 bool PoisonOnly, unsigned Depth) const {
3988 assert(
3989 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3990 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3991 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3992 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3993 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3994 " is a target node!");
3995
3996 // If Op can't create undef/poison and none of its operands are undef/poison
3997 // then Op is never undef/poison.
3998 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3999 /*ConsiderFlags*/ true, Depth) &&
4000 all_of(Op->ops(), [&](SDValue V) {
4001 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
4002 Depth + 1);
4003 });
4004}
4005
4007 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4008 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4009 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4010 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4011 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4012 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4013 "Should use canCreateUndefOrPoison if you don't know whether Op"
4014 " is a target node!");
4015 // Be conservative and return true.
4016 return true;
4017}
4018
4020 const APInt &DemandedElts,
4021 const SelectionDAG &DAG,
4022 bool SNaN,
4023 unsigned Depth) const {
4024 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4025 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4026 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4027 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4028 "Should use isKnownNeverNaN if you don't know whether Op"
4029 " is a target node!");
4030 return false;
4031}
4032
4034 const APInt &DemandedElts,
4035 APInt &UndefElts,
4036 const SelectionDAG &DAG,
4037 unsigned Depth) const {
4038 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4039 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4040 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4041 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4042 "Should use isSplatValue if you don't know whether Op"
4043 " is a target node!");
4044 return false;
4045}
4046
4047// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4048// work with truncating build vectors and vectors with elements of less than
4049// 8 bits.
4051 if (!N)
4052 return false;
4053
4054 unsigned EltWidth;
4055 APInt CVal;
4056 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4057 /*AllowTruncation=*/true)) {
4058 CVal = CN->getAPIntValue();
4059 EltWidth = N.getValueType().getScalarSizeInBits();
4060 } else
4061 return false;
4062
4063 // If this is a truncating splat, truncate the splat value.
4064 // Otherwise, we may fail to match the expected values below.
4065 if (EltWidth < CVal.getBitWidth())
4066 CVal = CVal.trunc(EltWidth);
4067
4068 switch (getBooleanContents(N.getValueType())) {
4070 return CVal[0];
4072 return CVal.isOne();
4074 return CVal.isAllOnes();
4075 }
4076
4077 llvm_unreachable("Invalid boolean contents");
4078}
4079
4081 if (!N)
4082 return false;
4083
4085 if (!CN) {
4087 if (!BV)
4088 return false;
4089
4090 // Only interested in constant splats, we don't care about undef
4091 // elements in identifying boolean constants and getConstantSplatNode
4092 // returns NULL if all ops are undef;
4093 CN = BV->getConstantSplatNode();
4094 if (!CN)
4095 return false;
4096 }
4097
4098 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4099 return !CN->getAPIntValue()[0];
4100
4101 return CN->isZero();
4102}
4103
4105 bool SExt) const {
4106 if (VT == MVT::i1)
4107 return N->isOne();
4108
4110 switch (Cnt) {
4112 // An extended value of 1 is always true, unless its original type is i1,
4113 // in which case it will be sign extended to -1.
4114 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4117 return N->isAllOnes() && SExt;
4118 }
4119 llvm_unreachable("Unexpected enumeration.");
4120}
4121
4122/// This helper function of SimplifySetCC tries to optimize the comparison when
4123/// either operand of the SetCC node is a bitwise-and instruction.
4124SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4125 ISD::CondCode Cond, const SDLoc &DL,
4126 DAGCombinerInfo &DCI) const {
4127 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4128 std::swap(N0, N1);
4129
4130 SelectionDAG &DAG = DCI.DAG;
4131 EVT OpVT = N0.getValueType();
4132 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4133 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4134 return SDValue();
4135
4136 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4137 // iff everything but LSB is known zero:
4138 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4141 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4142 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4143 if (DAG.MaskedValueIsZero(N0, UpperBits))
4144 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4145 }
4146
4147 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4148 // test in a narrow type that we can truncate to with no cost. Examples:
4149 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4150 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4151 // TODO: This conservatively checks for type legality on the source and
4152 // destination types. That may inhibit optimizations, but it also
4153 // allows setcc->shift transforms that may be more beneficial.
4154 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4155 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4156 isTypeLegal(OpVT) && N0.hasOneUse()) {
4157 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4158 AndC->getAPIntValue().getActiveBits());
4159 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4160 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4161 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4162 return DAG.getSetCC(DL, VT, Trunc, Zero,
4164 }
4165 }
4166
4167 // Match these patterns in any of their permutations:
4168 // (X & Y) == Y
4169 // (X & Y) != Y
4170 SDValue X, Y;
4171 if (N0.getOperand(0) == N1) {
4172 X = N0.getOperand(1);
4173 Y = N0.getOperand(0);
4174 } else if (N0.getOperand(1) == N1) {
4175 X = N0.getOperand(0);
4176 Y = N0.getOperand(1);
4177 } else {
4178 return SDValue();
4179 }
4180
4181 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4182 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4183 // its liable to create and infinite loop.
4184 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4185 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4187 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4188 // Note that where Y is variable and is known to have at most one bit set
4189 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4190 // equivalent when Y == 0.
4191 assert(OpVT.isInteger());
4193 if (DCI.isBeforeLegalizeOps() ||
4195 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4196 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4197 // If the target supports an 'and-not' or 'and-complement' logic operation,
4198 // try to use that to make a comparison operation more efficient.
4199 // But don't do this transform if the mask is a single bit because there are
4200 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4201 // 'rlwinm' on PPC).
4202
4203 // Bail out if the compare operand that we want to turn into a zero is
4204 // already a zero (otherwise, infinite loop).
4205 if (isNullConstant(Y))
4206 return SDValue();
4207
4208 // Transform this into: ~X & Y == 0.
4209 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4210 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4211 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4212 }
4213
4214 return SDValue();
4215}
4216
4217/// This helper function of SimplifySetCC tries to optimize the comparison when
4218/// either operand of the SetCC node is a bitwise-or instruction.
4219/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4220SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4221 ISD::CondCode Cond, const SDLoc &DL,
4222 DAGCombinerInfo &DCI) const {
4223 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4224 std::swap(N0, N1);
4225
4226 SelectionDAG &DAG = DCI.DAG;
4227 EVT OpVT = N0.getValueType();
4228 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4229 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4230 return SDValue();
4231
4232 // (X | Y) == Y
4233 // (X | Y) != Y
4234 SDValue X;
4235 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4236 // If the target supports an 'and-not' or 'and-complement' logic operation,
4237 // try to use that to make a comparison operation more efficient.
4238
4239 // Bail out if the compare operand that we want to turn into a zero is
4240 // already a zero (otherwise, infinite loop).
4241 if (isNullConstant(N1))
4242 return SDValue();
4243
4244 // Transform this into: X & ~Y ==/!= 0.
4245 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4246 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4247 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4248 }
4249
4250 return SDValue();
4251}
4252
4253/// There are multiple IR patterns that could be checking whether certain
4254/// truncation of a signed number would be lossy or not. The pattern which is
4255/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4256/// We are looking for the following pattern: (KeptBits is a constant)
4257/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4258/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4259/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4260/// We will unfold it into the natural trunc+sext pattern:
4261/// ((%x << C) a>> C) dstcond %x
4262/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4263SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4264 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4265 const SDLoc &DL) const {
4266 // We must be comparing with a constant.
4267 ConstantSDNode *C1;
4268 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4269 return SDValue();
4270
4271 // N0 should be: add %x, (1 << (KeptBits-1))
4272 if (N0->getOpcode() != ISD::ADD)
4273 return SDValue();
4274
4275 // And we must be 'add'ing a constant.
4276 ConstantSDNode *C01;
4277 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4278 return SDValue();
4279
4280 SDValue X = N0->getOperand(0);
4281 EVT XVT = X.getValueType();
4282
4283 // Validate constants ...
4284
4285 APInt I1 = C1->getAPIntValue();
4286
4287 ISD::CondCode NewCond;
4288 if (Cond == ISD::CondCode::SETULT) {
4289 NewCond = ISD::CondCode::SETEQ;
4290 } else if (Cond == ISD::CondCode::SETULE) {
4291 NewCond = ISD::CondCode::SETEQ;
4292 // But need to 'canonicalize' the constant.
4293 I1 += 1;
4294 } else if (Cond == ISD::CondCode::SETUGT) {
4295 NewCond = ISD::CondCode::SETNE;
4296 // But need to 'canonicalize' the constant.
4297 I1 += 1;
4298 } else if (Cond == ISD::CondCode::SETUGE) {
4299 NewCond = ISD::CondCode::SETNE;
4300 } else
4301 return SDValue();
4302
4303 APInt I01 = C01->getAPIntValue();
4304
4305 auto checkConstants = [&I1, &I01]() -> bool {
4306 // Both of them must be power-of-two, and the constant from setcc is bigger.
4307 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4308 };
4309
4310 if (checkConstants()) {
4311 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4312 } else {
4313 // What if we invert constants? (and the target predicate)
4314 I1.negate();
4315 I01.negate();
4316 assert(XVT.isInteger());
4317 NewCond = getSetCCInverse(NewCond, XVT);
4318 if (!checkConstants())
4319 return SDValue();
4320 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4321 }
4322
4323 // They are power-of-two, so which bit is set?
4324 const unsigned KeptBits = I1.logBase2();
4325 const unsigned KeptBitsMinusOne = I01.logBase2();
4326
4327 // Magic!
4328 if (KeptBits != (KeptBitsMinusOne + 1))
4329 return SDValue();
4330 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4331
4332 // We don't want to do this in every single case.
4333 SelectionDAG &DAG = DCI.DAG;
4334 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4335 return SDValue();
4336
4337 // Unfold into: sext_inreg(%x) cond %x
4338 // Where 'cond' will be either 'eq' or 'ne'.
4339 SDValue SExtInReg = DAG.getNode(
4341 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4342 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4343}
4344
4345// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4346SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4347 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4348 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4350 "Should be a comparison with 0.");
4351 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4352 "Valid only for [in]equality comparisons.");
4353
4354 unsigned NewShiftOpcode;
4355 SDValue X, C, Y;
4356
4357 SelectionDAG &DAG = DCI.DAG;
4358
4359 // Look for '(C l>>/<< Y)'.
4360 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4361 // The shift should be one-use.
4362 if (!V.hasOneUse())
4363 return false;
4364 unsigned OldShiftOpcode = V.getOpcode();
4365 switch (OldShiftOpcode) {
4366 case ISD::SHL:
4367 NewShiftOpcode = ISD::SRL;
4368 break;
4369 case ISD::SRL:
4370 NewShiftOpcode = ISD::SHL;
4371 break;
4372 default:
4373 return false; // must be a logical shift.
4374 }
4375 // We should be shifting a constant.
4376 // FIXME: best to use isConstantOrConstantVector().
4377 C = V.getOperand(0);
4378 ConstantSDNode *CC =
4379 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4380 if (!CC)
4381 return false;
4382 Y = V.getOperand(1);
4383
4384 ConstantSDNode *XC =
4385 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4387 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4388 };
4389
4390 // LHS of comparison should be an one-use 'and'.
4391 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4392 return SDValue();
4393
4394 X = N0.getOperand(0);
4395 SDValue Mask = N0.getOperand(1);
4396
4397 // 'and' is commutative!
4398 if (!Match(Mask)) {
4399 std::swap(X, Mask);
4400 if (!Match(Mask))
4401 return SDValue();
4402 }
4403
4404 EVT VT = X.getValueType();
4405
4406 // Produce:
4407 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4408 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4409 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4410 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4411 return T2;
4412}
4413
4414/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4415/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4416/// handle the commuted versions of these patterns.
4417SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4418 ISD::CondCode Cond, const SDLoc &DL,
4419 DAGCombinerInfo &DCI) const {
4420 unsigned BOpcode = N0.getOpcode();
4421 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4422 "Unexpected binop");
4423 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4424
4425 // (X + Y) == X --> Y == 0
4426 // (X - Y) == X --> Y == 0
4427 // (X ^ Y) == X --> Y == 0
4428 SelectionDAG &DAG = DCI.DAG;
4429 EVT OpVT = N0.getValueType();
4430 SDValue X = N0.getOperand(0);
4431 SDValue Y = N0.getOperand(1);
4432 if (X == N1)
4433 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4434
4435 if (Y != N1)
4436 return SDValue();
4437
4438 // (X + Y) == Y --> X == 0
4439 // (X ^ Y) == Y --> X == 0
4440 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4441 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4442
4443 // The shift would not be valid if the operands are boolean (i1).
4444 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4445 return SDValue();
4446
4447 // (X - Y) == Y --> X == Y << 1
4448 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4449 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4450 if (!DCI.isCalledByLegalizer())
4451 DCI.AddToWorklist(YShl1.getNode());
4452 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4453}
4454
4456 SDValue N0, const APInt &C1,
4457 ISD::CondCode Cond, const SDLoc &dl,
4458 SelectionDAG &DAG) {
4459 // Look through truncs that don't change the value of a ctpop.
4460 // FIXME: Add vector support? Need to be careful with setcc result type below.
4461 SDValue CTPOP = N0;
4462 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4464 CTPOP = N0.getOperand(0);
4465
4466 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4467 return SDValue();
4468
4469 EVT CTVT = CTPOP.getValueType();
4470 SDValue CTOp = CTPOP.getOperand(0);
4471
4472 // Expand a power-of-2-or-zero comparison based on ctpop:
4473 // (ctpop x) u< 2 -> (x & x-1) == 0
4474 // (ctpop x) u> 1 -> (x & x-1) != 0
4475 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4476 // Keep the CTPOP if it is a cheap vector op.
4477 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4478 return SDValue();
4479
4480 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4481 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4482 return SDValue();
4483 if (C1 == 0 && (Cond == ISD::SETULT))
4484 return SDValue(); // This is handled elsewhere.
4485
4486 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4487
4488 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4489 SDValue Result = CTOp;
4490 for (unsigned i = 0; i < Passes; i++) {
4491 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4492 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4493 }
4495 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4496 }
4497
4498 // Expand a power-of-2 comparison based on ctpop
4499 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4500 // Keep the CTPOP if it is cheap.
4501 if (TLI.isCtpopFast(CTVT))
4502 return SDValue();
4503
4504 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4505 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4506 assert(CTVT.isInteger());
4507 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4508
4509 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4510 // check before emitting a potentially unnecessary op.
4511 if (DAG.isKnownNeverZero(CTOp)) {
4512 // (ctpop x) == 1 --> (x & x-1) == 0
4513 // (ctpop x) != 1 --> (x & x-1) != 0
4514 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4515 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4516 return RHS;
4517 }
4518
4519 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4520 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4521 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4523 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4524 }
4525
4526 return SDValue();
4527}
4528
4530 ISD::CondCode Cond, const SDLoc &dl,
4531 SelectionDAG &DAG) {
4532 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4533 return SDValue();
4534
4535 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4536 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4537 return SDValue();
4538
4539 auto getRotateSource = [](SDValue X) {
4540 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4541 return X.getOperand(0);
4542 return SDValue();
4543 };
4544
4545 // Peek through a rotated value compared against 0 or -1:
4546 // (rot X, Y) == 0/-1 --> X == 0/-1
4547 // (rot X, Y) != 0/-1 --> X != 0/-1
4548 if (SDValue R = getRotateSource(N0))
4549 return DAG.getSetCC(dl, VT, R, N1, Cond);
4550
4551 // Peek through an 'or' of a rotated value compared against 0:
4552 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4553 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4554 //
4555 // TODO: Add the 'and' with -1 sibling.
4556 // TODO: Recurse through a series of 'or' ops to find the rotate.
4557 EVT OpVT = N0.getValueType();
4558 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4559 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4560 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4561 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4562 }
4563 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4564 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4565 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4566 }
4567 }
4568
4569 return SDValue();
4570}
4571
4573 ISD::CondCode Cond, const SDLoc &dl,
4574 SelectionDAG &DAG) {
4575 // If we are testing for all-bits-clear, we might be able to do that with
4576 // less shifting since bit-order does not matter.
4577 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4578 return SDValue();
4579
4580 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4581 if (!C1 || !C1->isZero())
4582 return SDValue();
4583
4584 if (!N0.hasOneUse() ||
4585 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4586 return SDValue();
4587
4588 unsigned BitWidth = N0.getScalarValueSizeInBits();
4589 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4590 if (!ShAmtC)
4591 return SDValue();
4592
4593 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4594 if (ShAmt == 0)
4595 return SDValue();
4596
4597 // Canonicalize fshr as fshl to reduce pattern-matching.
4598 if (N0.getOpcode() == ISD::FSHR)
4599 ShAmt = BitWidth - ShAmt;
4600
4601 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4602 SDValue X, Y;
4603 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4604 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4605 return false;
4606 if (Or.getOperand(0) == Other) {
4607 X = Or.getOperand(0);
4608 Y = Or.getOperand(1);
4609 return true;
4610 }
4611 if (Or.getOperand(1) == Other) {
4612 X = Or.getOperand(1);
4613 Y = Or.getOperand(0);
4614 return true;
4615 }
4616 return false;
4617 };
4618
4619 EVT OpVT = N0.getValueType();
4620 EVT ShAmtVT = N0.getOperand(2).getValueType();
4621 SDValue F0 = N0.getOperand(0);
4622 SDValue F1 = N0.getOperand(1);
4623 if (matchOr(F0, F1)) {
4624 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4625 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4626 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4627 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4628 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4629 }
4630 if (matchOr(F1, F0)) {
4631 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4632 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4633 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4634 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4635 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4636 }
4637
4638 return SDValue();
4639}
4640
4641/// Try to simplify a setcc built with the specified operands and cc. If it is
4642/// unable to simplify it, return a null SDValue.
4644 ISD::CondCode Cond, bool foldBooleans,
4645 DAGCombinerInfo &DCI,
4646 const SDLoc &dl) const {
4647 SelectionDAG &DAG = DCI.DAG;
4648 const DataLayout &Layout = DAG.getDataLayout();
4649 EVT OpVT = N0.getValueType();
4650 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4651
4652 // Constant fold or commute setcc.
4653 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4654 return Fold;
4655
4656 bool N0ConstOrSplat =
4657 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4658 bool N1ConstOrSplat =
4659 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4660
4661 // Canonicalize toward having the constant on the RHS.
4662 // TODO: Handle non-splat vector constants. All undef causes trouble.
4663 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4664 // infinite loop here when we encounter one.
4666 if (N0ConstOrSplat && !N1ConstOrSplat &&
4667 (DCI.isBeforeLegalizeOps() ||
4668 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4669 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4670
4671 // If we have a subtract with the same 2 non-constant operands as this setcc
4672 // -- but in reverse order -- then try to commute the operands of this setcc
4673 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4674 // instruction on some targets.
4675 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4676 (DCI.isBeforeLegalizeOps() ||
4677 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4678 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4679 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4680 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4681
4682 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4683 return V;
4684
4685 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4686 return V;
4687
4688 if (auto *N1C = isConstOrConstSplat(N1)) {
4689 const APInt &C1 = N1C->getAPIntValue();
4690
4691 // Optimize some CTPOP cases.
4692 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4693 return V;
4694
4695 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4696 // X * Y == 0 --> (X == 0) || (Y == 0)
4697 // X * Y != 0 --> (X != 0) && (Y != 0)
4698 // TODO: This bails out if minsize is set, but if the target doesn't have a
4699 // single instruction multiply for this type, it would likely be
4700 // smaller to decompose.
4701 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4702 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4703 (N0->getFlags().hasNoUnsignedWrap() ||
4704 N0->getFlags().hasNoSignedWrap()) &&
4705 !Attr.hasFnAttr(Attribute::MinSize)) {
4706 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4707 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4708 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4709 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4710 }
4711
4712 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4713 // equality comparison, then we're just comparing whether X itself is
4714 // zero.
4715 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4716 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4718 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4719 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4720 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4721 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4722 // (srl (ctlz x), 5) == 0 -> X != 0
4723 // (srl (ctlz x), 5) != 1 -> X != 0
4724 Cond = ISD::SETNE;
4725 } else {
4726 // (srl (ctlz x), 5) != 0 -> X == 0
4727 // (srl (ctlz x), 5) == 1 -> X == 0
4728 Cond = ISD::SETEQ;
4729 }
4730 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4731 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4732 Cond);
4733 }
4734 }
4735 }
4736 }
4737
4738 // FIXME: Support vectors.
4739 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4740 const APInt &C1 = N1C->getAPIntValue();
4741
4742 // (zext x) == C --> x == (trunc C)
4743 // (sext x) == C --> x == (trunc C)
4744 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4745 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4746 unsigned MinBits = N0.getValueSizeInBits();
4747 SDValue PreExt;
4748 bool Signed = false;
4749 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4750 // ZExt
4751 MinBits = N0->getOperand(0).getValueSizeInBits();
4752 PreExt = N0->getOperand(0);
4753 } else if (N0->getOpcode() == ISD::AND) {
4754 // DAGCombine turns costly ZExts into ANDs
4755 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4756 if ((C->getAPIntValue()+1).isPowerOf2()) {
4757 MinBits = C->getAPIntValue().countr_one();
4758 PreExt = N0->getOperand(0);
4759 }
4760 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4761 // SExt
4762 MinBits = N0->getOperand(0).getValueSizeInBits();
4763 PreExt = N0->getOperand(0);
4764 Signed = true;
4765 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4766 // ZEXTLOAD / SEXTLOAD
4767 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4768 MinBits = LN0->getMemoryVT().getSizeInBits();
4769 PreExt = N0;
4770 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4771 Signed = true;
4772 MinBits = LN0->getMemoryVT().getSizeInBits();
4773 PreExt = N0;
4774 }
4775 }
4776
4777 // Figure out how many bits we need to preserve this constant.
4778 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4779
4780 // Make sure we're not losing bits from the constant.
4781 if (MinBits > 0 &&
4782 MinBits < C1.getBitWidth() &&
4783 MinBits >= ReqdBits) {
4784 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4785 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4786 // Will get folded away.
4787 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4788 if (MinBits == 1 && C1 == 1)
4789 // Invert the condition.
4790 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4792 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4793 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4794 }
4795
4796 // If truncating the setcc operands is not desirable, we can still
4797 // simplify the expression in some cases:
4798 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4799 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4800 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4801 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4802 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4803 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4804 SDValue TopSetCC = N0->getOperand(0);
4805 unsigned N0Opc = N0->getOpcode();
4806 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4807 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4808 TopSetCC.getOpcode() == ISD::SETCC &&
4809 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4810 (isConstFalseVal(N1) ||
4811 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4812
4813 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4814 (!N1C->isZero() && Cond == ISD::SETNE);
4815
4816 if (!Inverse)
4817 return TopSetCC;
4818
4820 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4821 TopSetCC.getOperand(0).getValueType());
4822 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4823 TopSetCC.getOperand(1),
4824 InvCond);
4825 }
4826 }
4827 }
4828
4829 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4830 // equality or unsigned, and all 1 bits of the const are in the same
4831 // partial word, see if we can shorten the load.
4832 if (DCI.isBeforeLegalize() &&
4834 N0.getOpcode() == ISD::AND && C1 == 0 &&
4835 N0.getNode()->hasOneUse() &&
4836 isa<LoadSDNode>(N0.getOperand(0)) &&
4837 N0.getOperand(0).getNode()->hasOneUse() &&
4839 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4840 APInt bestMask;
4841 unsigned bestWidth = 0, bestOffset = 0;
4842 if (Lod->isSimple() && Lod->isUnindexed() &&
4843 (Lod->getMemoryVT().isByteSized() ||
4844 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4845 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4846 unsigned origWidth = N0.getValueSizeInBits();
4847 unsigned maskWidth = origWidth;
4848 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4849 // 8 bits, but have to be careful...
4850 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4851 origWidth = Lod->getMemoryVT().getSizeInBits();
4852 const APInt &Mask = N0.getConstantOperandAPInt(1);
4853 // Only consider power-of-2 widths (and at least one byte) as candiates
4854 // for the narrowed load.
4855 for (unsigned width = 8; width < origWidth; width *= 2) {
4856 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4857 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4858 // Avoid accessing any padding here for now (we could use memWidth
4859 // instead of origWidth here otherwise).
4860 unsigned maxOffset = origWidth - width;
4861 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4862 if (Mask.isSubsetOf(newMask)) {
4863 unsigned ptrOffset =
4864 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4865 unsigned IsFast = 0;
4866 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4867 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4869 ptrOffset / 8) &&
4871 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4872 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4873 IsFast) {
4874 bestOffset = ptrOffset / 8;
4875 bestMask = Mask.lshr(offset);
4876 bestWidth = width;
4877 break;
4878 }
4879 }
4880 newMask <<= 8;
4881 }
4882 if (bestWidth)
4883 break;
4884 }
4885 }
4886 if (bestWidth) {
4887 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4888 SDValue Ptr = Lod->getBasePtr();
4889 if (bestOffset != 0)
4890 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4891 SDValue NewLoad =
4892 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4893 Lod->getPointerInfo().getWithOffset(bestOffset),
4894 Lod->getBaseAlign());
4895 SDValue And =
4896 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4897 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4898 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4899 }
4900 }
4901
4902 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4903 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4904 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4905
4906 // If the comparison constant has bits in the upper part, the
4907 // zero-extended value could never match.
4909 C1.getBitWidth() - InSize))) {
4910 switch (Cond) {
4911 case ISD::SETUGT:
4912 case ISD::SETUGE:
4913 case ISD::SETEQ:
4914 return DAG.getConstant(0, dl, VT);
4915 case ISD::SETULT:
4916 case ISD::SETULE:
4917 case ISD::SETNE:
4918 return DAG.getConstant(1, dl, VT);
4919 case ISD::SETGT:
4920 case ISD::SETGE:
4921 // True if the sign bit of C1 is set.
4922 return DAG.getConstant(C1.isNegative(), dl, VT);
4923 case ISD::SETLT:
4924 case ISD::SETLE:
4925 // True if the sign bit of C1 isn't set.
4926 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4927 default:
4928 break;
4929 }
4930 }
4931
4932 // Otherwise, we can perform the comparison with the low bits.
4933 switch (Cond) {
4934 case ISD::SETEQ:
4935 case ISD::SETNE:
4936 case ISD::SETUGT:
4937 case ISD::SETUGE:
4938 case ISD::SETULT:
4939 case ISD::SETULE: {
4940 EVT newVT = N0.getOperand(0).getValueType();
4941 // FIXME: Should use isNarrowingProfitable.
4942 if (DCI.isBeforeLegalizeOps() ||
4943 (isOperationLegal(ISD::SETCC, newVT) &&
4944 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4946 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4947 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4948
4949 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4950 NewConst, Cond);
4951 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4952 }
4953 break;
4954 }
4955 default:
4956 break; // todo, be more careful with signed comparisons
4957 }
4958 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4959 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4961 OpVT)) {
4962 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4963 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4964 EVT ExtDstTy = N0.getValueType();
4965 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4966
4967 // If the constant doesn't fit into the number of bits for the source of
4968 // the sign extension, it is impossible for both sides to be equal.
4969 if (C1.getSignificantBits() > ExtSrcTyBits)
4970 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4971
4972 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4973 ExtDstTy != ExtSrcTy && "Unexpected types!");
4974 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4975 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4976 DAG.getConstant(Imm, dl, ExtDstTy));
4977 if (!DCI.isCalledByLegalizer())
4978 DCI.AddToWorklist(ZextOp.getNode());
4979 // Otherwise, make this a use of a zext.
4980 return DAG.getSetCC(dl, VT, ZextOp,
4981 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4982 } else if ((N1C->isZero() || N1C->isOne()) &&
4983 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4984 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4985 // excluded as they are handled below whilst checking for foldBooleans.
4986 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4987 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4988 (N0.getValueType() == MVT::i1 ||
4992 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4993 if (TrueWhenTrue)
4994 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4995 // Invert the condition.
4996 if (N0.getOpcode() == ISD::SETCC) {
4999 if (DCI.isBeforeLegalizeOps() ||
5001 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5002 }
5003 }
5004
5005 if ((N0.getOpcode() == ISD::XOR ||
5006 (N0.getOpcode() == ISD::AND &&
5007 N0.getOperand(0).getOpcode() == ISD::XOR &&
5008 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5009 isOneConstant(N0.getOperand(1))) {
5010 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5011 // can only do this if the top bits are known zero.
5012 unsigned BitWidth = N0.getValueSizeInBits();
5013 if (DAG.MaskedValueIsZero(N0,
5015 BitWidth-1))) {
5016 // Okay, get the un-inverted input value.
5017 SDValue Val;
5018 if (N0.getOpcode() == ISD::XOR) {
5019 Val = N0.getOperand(0);
5020 } else {
5021 assert(N0.getOpcode() == ISD::AND &&
5022 N0.getOperand(0).getOpcode() == ISD::XOR);
5023 // ((X^1)&1)^1 -> X & 1
5024 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5025 N0.getOperand(0).getOperand(0),
5026 N0.getOperand(1));
5027 }
5028
5029 return DAG.getSetCC(dl, VT, Val, N1,
5031 }
5032 } else if (N1C->isOne()) {
5033 SDValue Op0 = N0;
5034 if (Op0.getOpcode() == ISD::TRUNCATE)
5035 Op0 = Op0.getOperand(0);
5036
5037 if ((Op0.getOpcode() == ISD::XOR) &&
5038 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5039 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5040 SDValue XorLHS = Op0.getOperand(0);
5041 SDValue XorRHS = Op0.getOperand(1);
5042 // Ensure that the input setccs return an i1 type or 0/1 value.
5043 if (Op0.getValueType() == MVT::i1 ||
5048 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5050 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5051 }
5052 }
5053 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5054 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5055 if (Op0.getValueType().bitsGT(VT))
5056 Op0 = DAG.getNode(ISD::AND, dl, VT,
5057 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5058 DAG.getConstant(1, dl, VT));
5059 else if (Op0.getValueType().bitsLT(VT))
5060 Op0 = DAG.getNode(ISD::AND, dl, VT,
5061 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5062 DAG.getConstant(1, dl, VT));
5063
5064 return DAG.getSetCC(dl, VT, Op0,
5065 DAG.getConstant(0, dl, Op0.getValueType()),
5067 }
5068 if (Op0.getOpcode() == ISD::AssertZext &&
5069 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5070 return DAG.getSetCC(dl, VT, Op0,
5071 DAG.getConstant(0, dl, Op0.getValueType()),
5073 }
5074 }
5075
5076 // Given:
5077 // icmp eq/ne (urem %x, %y), 0
5078 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5079 // icmp eq/ne %x, 0
5080 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5081 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5082 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5083 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5084 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5085 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5086 }
5087
5088 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5089 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5090 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5092 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5093 N1C->isAllOnes()) {
5094 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5095 DAG.getConstant(0, dl, OpVT),
5097 }
5098
5099 // fold (setcc (trunc x) c) -> (setcc x c)
5100 if (N0.getOpcode() == ISD::TRUNCATE &&
5102 (N0->getFlags().hasNoSignedWrap() &&
5105 EVT NewVT = N0.getOperand(0).getValueType();
5106 SDValue NewConst = DAG.getConstant(
5108 ? C1.sext(NewVT.getSizeInBits())
5109 : C1.zext(NewVT.getSizeInBits()),
5110 dl, NewVT);
5111 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5112 }
5113
5114 if (SDValue V =
5115 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5116 return V;
5117 }
5118
5119 // These simplifications apply to splat vectors as well.
5120 // TODO: Handle more splat vector cases.
5121 if (auto *N1C = isConstOrConstSplat(N1)) {
5122 const APInt &C1 = N1C->getAPIntValue();
5123
5124 APInt MinVal, MaxVal;
5125 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5127 MinVal = APInt::getSignedMinValue(OperandBitSize);
5128 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5129 } else {
5130 MinVal = APInt::getMinValue(OperandBitSize);
5131 MaxVal = APInt::getMaxValue(OperandBitSize);
5132 }
5133
5134 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5135 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5136 // X >= MIN --> true
5137 if (C1 == MinVal)
5138 return DAG.getBoolConstant(true, dl, VT, OpVT);
5139
5140 if (!VT.isVector()) { // TODO: Support this for vectors.
5141 // X >= C0 --> X > (C0 - 1)
5142 APInt C = C1 - 1;
5144 if ((DCI.isBeforeLegalizeOps() ||
5145 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5146 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5147 isLegalICmpImmediate(C.getSExtValue())))) {
5148 return DAG.getSetCC(dl, VT, N0,
5149 DAG.getConstant(C, dl, N1.getValueType()),
5150 NewCC);
5151 }
5152 }
5153 }
5154
5155 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5156 // X <= MAX --> true
5157 if (C1 == MaxVal)
5158 return DAG.getBoolConstant(true, dl, VT, OpVT);
5159
5160 // X <= C0 --> X < (C0 + 1)
5161 if (!VT.isVector()) { // TODO: Support this for vectors.
5162 APInt C = C1 + 1;
5164 if ((DCI.isBeforeLegalizeOps() ||
5165 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5166 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5167 isLegalICmpImmediate(C.getSExtValue())))) {
5168 return DAG.getSetCC(dl, VT, N0,
5169 DAG.getConstant(C, dl, N1.getValueType()),
5170 NewCC);
5171 }
5172 }
5173 }
5174
5175 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5176 if (C1 == MinVal)
5177 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5178
5179 // TODO: Support this for vectors after legalize ops.
5180 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5181 // Canonicalize setlt X, Max --> setne X, Max
5182 if (C1 == MaxVal)
5183 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5184
5185 // If we have setult X, 1, turn it into seteq X, 0
5186 if (C1 == MinVal+1)
5187 return DAG.getSetCC(dl, VT, N0,
5188 DAG.getConstant(MinVal, dl, N0.getValueType()),
5189 ISD::SETEQ);
5190 }
5191 }
5192
5193 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5194 if (C1 == MaxVal)
5195 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5196
5197 // TODO: Support this for vectors after legalize ops.
5198 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5199 // Canonicalize setgt X, Min --> setne X, Min
5200 if (C1 == MinVal)
5201 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5202
5203 // If we have setugt X, Max-1, turn it into seteq X, Max
5204 if (C1 == MaxVal-1)
5205 return DAG.getSetCC(dl, VT, N0,
5206 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5207 ISD::SETEQ);
5208 }
5209 }
5210
5211 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5212 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5213 if (C1.isZero())
5214 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5215 VT, N0, N1, Cond, DCI, dl))
5216 return CC;
5217
5218 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5219 // For example, when high 32-bits of i64 X are known clear:
5220 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5221 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5222 bool CmpZero = N1C->isZero();
5223 bool CmpNegOne = N1C->isAllOnes();
5224 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5225 // Match or(lo,shl(hi,bw/2)) pattern.
5226 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5227 unsigned EltBits = V.getScalarValueSizeInBits();
5228 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5229 return false;
5230 SDValue LHS = V.getOperand(0);
5231 SDValue RHS = V.getOperand(1);
5232 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5233 // Unshifted element must have zero upperbits.
5234 if (RHS.getOpcode() == ISD::SHL &&
5235 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5236 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5237 DAG.MaskedValueIsZero(LHS, HiBits)) {
5238 Lo = LHS;
5239 Hi = RHS.getOperand(0);
5240 return true;
5241 }
5242 if (LHS.getOpcode() == ISD::SHL &&
5243 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5244 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5245 DAG.MaskedValueIsZero(RHS, HiBits)) {
5246 Lo = RHS;
5247 Hi = LHS.getOperand(0);
5248 return true;
5249 }
5250 return false;
5251 };
5252
5253 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5254 unsigned EltBits = N0.getScalarValueSizeInBits();
5255 unsigned HalfBits = EltBits / 2;
5256 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5257 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5258 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5259 SDValue NewN0 =
5260 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5261 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5262 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5263 };
5264
5265 SDValue Lo, Hi;
5266 if (IsConcat(N0, Lo, Hi))
5267 return MergeConcat(Lo, Hi);
5268
5269 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5270 SDValue Lo0, Lo1, Hi0, Hi1;
5271 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5272 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5273 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5274 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5275 }
5276 }
5277 }
5278 }
5279
5280 // If we have "setcc X, C0", check to see if we can shrink the immediate
5281 // by changing cc.
5282 // TODO: Support this for vectors after legalize ops.
5283 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5284 // SETUGT X, SINTMAX -> SETLT X, 0
5285 // SETUGE X, SINTMIN -> SETLT X, 0
5286 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5287 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5288 return DAG.getSetCC(dl, VT, N0,
5289 DAG.getConstant(0, dl, N1.getValueType()),
5290 ISD::SETLT);
5291
5292 // SETULT X, SINTMIN -> SETGT X, -1
5293 // SETULE X, SINTMAX -> SETGT X, -1
5294 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5295 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5296 return DAG.getSetCC(dl, VT, N0,
5297 DAG.getAllOnesConstant(dl, N1.getValueType()),
5298 ISD::SETGT);
5299 }
5300 }
5301
5302 // Back to non-vector simplifications.
5303 // TODO: Can we do these for vector splats?
5304 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5305 const APInt &C1 = N1C->getAPIntValue();
5306 EVT ShValTy = N0.getValueType();
5307
5308 // Fold bit comparisons when we can. This will result in an
5309 // incorrect value when boolean false is negative one, unless
5310 // the bitsize is 1 in which case the false value is the same
5311 // in practice regardless of the representation.
5312 if ((VT.getSizeInBits() == 1 ||
5314 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5315 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5316 N0.getOpcode() == ISD::AND) {
5317 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5318 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5319 // Perform the xform if the AND RHS is a single bit.
5320 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5321 if (AndRHS->getAPIntValue().isPowerOf2() &&
5322 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5323 return DAG.getNode(
5324 ISD::TRUNCATE, dl, VT,
5325 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5326 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5327 }
5328 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5329 // (X & 8) == 8 --> (X & 8) >> 3
5330 // Perform the xform if C1 is a single bit.
5331 unsigned ShCt = C1.logBase2();
5332 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5333 return DAG.getNode(
5334 ISD::TRUNCATE, dl, VT,
5335 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5336 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5337 }
5338 }
5339 }
5340 }
5341
5342 if (C1.getSignificantBits() <= 64 &&
5344 // (X & -256) == 256 -> (X >> 8) == 1
5345 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5346 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5347 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5348 const APInt &AndRHSC = AndRHS->getAPIntValue();
5349 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5350 unsigned ShiftBits = AndRHSC.countr_zero();
5351 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5352 // If using an unsigned shift doesn't yield a legal compare
5353 // immediate, try using sra instead.
5354 APInt NewC = C1.lshr(ShiftBits);
5355 if (NewC.getSignificantBits() <= 64 &&
5357 APInt SignedC = C1.ashr(ShiftBits);
5358 if (SignedC.getSignificantBits() <= 64 &&
5360 SDValue Shift = DAG.getNode(
5361 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5362 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5363 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5364 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5365 }
5366 }
5367 SDValue Shift = DAG.getNode(
5368 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5369 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5370 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5371 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5372 }
5373 }
5374 }
5375 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5376 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5377 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5378 // X < 0x100000000 -> (X >> 32) < 1
5379 // X >= 0x100000000 -> (X >> 32) >= 1
5380 // X <= 0x0ffffffff -> (X >> 32) < 1
5381 // X > 0x0ffffffff -> (X >> 32) >= 1
5382 unsigned ShiftBits;
5383 APInt NewC = C1;
5384 ISD::CondCode NewCond = Cond;
5385 if (AdjOne) {
5386 ShiftBits = C1.countr_one();
5387 NewC = NewC + 1;
5388 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5389 } else {
5390 ShiftBits = C1.countr_zero();
5391 }
5392 NewC.lshrInPlace(ShiftBits);
5393 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5395 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5396 SDValue Shift =
5397 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5398 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5399 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5400 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5401 }
5402 }
5403 }
5404 }
5405
5407 auto *CFP = cast<ConstantFPSDNode>(N1);
5408 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5409
5410 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5411 // constant if knowing that the operand is non-nan is enough. We prefer to
5412 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5413 // materialize 0.0.
5414 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5415 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5416
5417 // setcc (fneg x), C -> setcc swap(pred) x, -C
5418 if (N0.getOpcode() == ISD::FNEG) {
5420 if (DCI.isBeforeLegalizeOps() ||
5421 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5422 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5423 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5424 }
5425 }
5426
5427 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5429 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5430 bool IsFabs = N0.getOpcode() == ISD::FABS;
5431 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5432 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5433 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5434 : (IsFabs ? fcInf : fcPosInf);
5435 if (Cond == ISD::SETUEQ)
5436 Flag |= fcNan;
5437 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5438 DAG.getTargetConstant(Flag, dl, MVT::i32));
5439 }
5440 }
5441
5442 // If the condition is not legal, see if we can find an equivalent one
5443 // which is legal.
5445 // If the comparison was an awkward floating-point == or != and one of
5446 // the comparison operands is infinity or negative infinity, convert the
5447 // condition to a less-awkward <= or >=.
5448 if (CFP->getValueAPF().isInfinity()) {
5449 bool IsNegInf = CFP->getValueAPF().isNegative();
5451 switch (Cond) {
5452 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5453 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5454 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5455 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5456 default: break;
5457 }
5458 if (NewCond != ISD::SETCC_INVALID &&
5459 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5460 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5461 }
5462 }
5463 }
5464
5465 if (N0 == N1) {
5466 // The sext(setcc()) => setcc() optimization relies on the appropriate
5467 // constant being emitted.
5468 assert(!N0.getValueType().isInteger() &&
5469 "Integer types should be handled by FoldSetCC");
5470
5471 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5472 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5473 if (UOF == 2) // FP operators that are undefined on NaNs.
5474 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5475 if (UOF == unsigned(EqTrue))
5476 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5477 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5478 // if it is not already.
5479 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5480 if (NewCond != Cond &&
5481 (DCI.isBeforeLegalizeOps() ||
5482 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5483 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5484 }
5485
5486 // ~X > ~Y --> Y > X
5487 // ~X < ~Y --> Y < X
5488 // ~X < C --> X > ~C
5489 // ~X > C --> X < ~C
5490 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5491 N0.getValueType().isInteger()) {
5492 if (isBitwiseNot(N0)) {
5493 if (isBitwiseNot(N1))
5494 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5495
5498 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5499 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5500 }
5501 }
5502 }
5503
5504 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5505 N0.getValueType().isInteger()) {
5506 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5507 N0.getOpcode() == ISD::XOR) {
5508 // Simplify (X+Y) == (X+Z) --> Y == Z
5509 if (N0.getOpcode() == N1.getOpcode()) {
5510 if (N0.getOperand(0) == N1.getOperand(0))
5511 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5512 if (N0.getOperand(1) == N1.getOperand(1))
5513 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5514 if (isCommutativeBinOp(N0.getOpcode())) {
5515 // If X op Y == Y op X, try other combinations.
5516 if (N0.getOperand(0) == N1.getOperand(1))
5517 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5518 Cond);
5519 if (N0.getOperand(1) == N1.getOperand(0))
5520 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5521 Cond);
5522 }
5523 }
5524
5525 // If RHS is a legal immediate value for a compare instruction, we need
5526 // to be careful about increasing register pressure needlessly.
5527 bool LegalRHSImm = false;
5528
5529 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5530 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5531 // Turn (X+C1) == C2 --> X == C2-C1
5532 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5533 return DAG.getSetCC(
5534 dl, VT, N0.getOperand(0),
5535 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5536 dl, N0.getValueType()),
5537 Cond);
5538
5539 // Turn (X^C1) == C2 --> X == C1^C2
5540 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5541 return DAG.getSetCC(
5542 dl, VT, N0.getOperand(0),
5543 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5544 dl, N0.getValueType()),
5545 Cond);
5546 }
5547
5548 // Turn (C1-X) == C2 --> X == C1-C2
5549 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5550 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5551 return DAG.getSetCC(
5552 dl, VT, N0.getOperand(1),
5553 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5554 dl, N0.getValueType()),
5555 Cond);
5556
5557 // Could RHSC fold directly into a compare?
5558 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5559 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5560 }
5561
5562 // (X+Y) == X --> Y == 0 and similar folds.
5563 // Don't do this if X is an immediate that can fold into a cmp
5564 // instruction and X+Y has other uses. It could be an induction variable
5565 // chain, and the transform would increase register pressure.
5566 if (!LegalRHSImm || N0.hasOneUse())
5567 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5568 return V;
5569 }
5570
5571 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5572 N1.getOpcode() == ISD::XOR)
5573 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5574 return V;
5575
5576 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5577 return V;
5578
5579 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5580 return V;
5581 }
5582
5583 // Fold remainder of division by a constant.
5584 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5585 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5586 // When division is cheap or optimizing for minimum size,
5587 // fall through to DIVREM creation by skipping this fold.
5588 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5589 if (N0.getOpcode() == ISD::UREM) {
5590 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5591 return Folded;
5592 } else if (N0.getOpcode() == ISD::SREM) {
5593 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5594 return Folded;
5595 }
5596 }
5597 }
5598
5599 // Fold away ALL boolean setcc's.
5600 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5601 SDValue Temp;
5602 switch (Cond) {
5603 default: llvm_unreachable("Unknown integer setcc!");
5604 case ISD::SETEQ: // X == Y -> ~(X^Y)
5605 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5606 N0 = DAG.getNOT(dl, Temp, OpVT);
5607 if (!DCI.isCalledByLegalizer())
5608 DCI.AddToWorklist(Temp.getNode());
5609 break;
5610 case ISD::SETNE: // X != Y --> (X^Y)
5611 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5612 break;
5613 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5614 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5615 Temp = DAG.getNOT(dl, N0, OpVT);
5616 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5617 if (!DCI.isCalledByLegalizer())
5618 DCI.AddToWorklist(Temp.getNode());
5619 break;
5620 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5621 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5622 Temp = DAG.getNOT(dl, N1, OpVT);
5623 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5624 if (!DCI.isCalledByLegalizer())
5625 DCI.AddToWorklist(Temp.getNode());
5626 break;
5627 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5628 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5629 Temp = DAG.getNOT(dl, N0, OpVT);
5630 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5631 if (!DCI.isCalledByLegalizer())
5632 DCI.AddToWorklist(Temp.getNode());
5633 break;
5634 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5635 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5636 Temp = DAG.getNOT(dl, N1, OpVT);
5637 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5638 break;
5639 }
5640 if (VT.getScalarType() != MVT::i1) {
5641 if (!DCI.isCalledByLegalizer())
5642 DCI.AddToWorklist(N0.getNode());
5643 // FIXME: If running after legalize, we probably can't do this.
5645 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5646 }
5647 return N0;
5648 }
5649
5650 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5651 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5652 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5654 N1->getFlags().hasNoUnsignedWrap()) ||
5656 N1->getFlags().hasNoSignedWrap())) &&
5658 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5659 }
5660
5661 // Could not fold it.
5662 return SDValue();
5663}
5664
5665/// Returns true (and the GlobalValue and the offset) if the node is a
5666/// GlobalAddress + offset.
5668 int64_t &Offset) const {
5669
5670 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5671
5672 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5673 GA = GASD->getGlobal();
5674 Offset += GASD->getOffset();
5675 return true;
5676 }
5677
5678 if (N->getOpcode() == ISD::ADD) {
5679 SDValue N1 = N->getOperand(0);
5680 SDValue N2 = N->getOperand(1);
5681 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5682 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5683 Offset += V->getSExtValue();
5684 return true;
5685 }
5686 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5687 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5688 Offset += V->getSExtValue();
5689 return true;
5690 }
5691 }
5692 }
5693
5694 return false;
5695}
5696
5698 DAGCombinerInfo &DCI) const {
5699 // Default implementation: no optimization.
5700 return SDValue();
5701}
5702
5703//===----------------------------------------------------------------------===//
5704// Inline Assembler Implementation Methods
5705//===----------------------------------------------------------------------===//
5706
5709 unsigned S = Constraint.size();
5710
5711 if (S == 1) {
5712 switch (Constraint[0]) {
5713 default: break;
5714 case 'r':
5715 return C_RegisterClass;
5716 case 'm': // memory
5717 case 'o': // offsetable
5718 case 'V': // not offsetable
5719 return C_Memory;
5720 case 'p': // Address.
5721 return C_Address;
5722 case 'n': // Simple Integer
5723 case 'E': // Floating Point Constant
5724 case 'F': // Floating Point Constant
5725 return C_Immediate;
5726 case 'i': // Simple Integer or Relocatable Constant
5727 case 's': // Relocatable Constant
5728 case 'X': // Allow ANY value.
5729 case 'I': // Target registers.
5730 case 'J':
5731 case 'K':
5732 case 'L':
5733 case 'M':
5734 case 'N':
5735 case 'O':
5736 case 'P':
5737 case '<':
5738 case '>':
5739 return C_Other;
5740 }
5741 }
5742
5743 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5744 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5745 return C_Memory;
5746 return C_Register;
5747 }
5748 return C_Unknown;
5749}
5750
5751/// Try to replace an X constraint, which matches anything, with another that
5752/// has more specific requirements based on the type of the corresponding
5753/// operand.
5754const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5755 if (ConstraintVT.isInteger())
5756 return "r";
5757 if (ConstraintVT.isFloatingPoint())
5758 return "f"; // works for many targets
5759 return nullptr;
5760}
5761
5763 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5764 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5765 return SDValue();
5766}
5767
5768/// Lower the specified operand into the Ops vector.
5769/// If it is invalid, don't add anything to Ops.
5771 StringRef Constraint,
5772 std::vector<SDValue> &Ops,
5773 SelectionDAG &DAG) const {
5774
5775 if (Constraint.size() > 1)
5776 return;
5777
5778 char ConstraintLetter = Constraint[0];
5779 switch (ConstraintLetter) {
5780 default: break;
5781 case 'X': // Allows any operand
5782 case 'i': // Simple Integer or Relocatable Constant
5783 case 'n': // Simple Integer
5784 case 's': { // Relocatable Constant
5785
5787 uint64_t Offset = 0;
5788
5789 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5790 // etc., since getelementpointer is variadic. We can't use
5791 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5792 // while in this case the GA may be furthest from the root node which is
5793 // likely an ISD::ADD.
5794 while (true) {
5795 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5796 // gcc prints these as sign extended. Sign extend value to 64 bits
5797 // now; without this it would get ZExt'd later in
5798 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5799 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5800 BooleanContent BCont = getBooleanContents(MVT::i64);
5801 ISD::NodeType ExtOpc =
5802 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5803 int64_t ExtVal =
5804 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5805 Ops.push_back(
5806 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5807 return;
5808 }
5809 if (ConstraintLetter != 'n') {
5810 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5811 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5812 GA->getValueType(0),
5813 Offset + GA->getOffset()));
5814 return;
5815 }
5816 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5817 Ops.push_back(DAG.getTargetBlockAddress(
5818 BA->getBlockAddress(), BA->getValueType(0),
5819 Offset + BA->getOffset(), BA->getTargetFlags()));
5820 return;
5821 }
5823 Ops.push_back(Op);
5824 return;
5825 }
5826 }
5827 const unsigned OpCode = Op.getOpcode();
5828 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5829 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5830 Op = Op.getOperand(1);
5831 // Subtraction is not commutative.
5832 else if (OpCode == ISD::ADD &&
5833 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5834 Op = Op.getOperand(0);
5835 else
5836 return;
5837 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5838 continue;
5839 }
5840 return;
5841 }
5842 break;
5843 }
5844 }
5845}
5846
5850
5851std::pair<unsigned, const TargetRegisterClass *>
5853 StringRef Constraint,
5854 MVT VT) const {
5855 if (!Constraint.starts_with("{"))
5856 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5857 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5858
5859 // Remove the braces from around the name.
5860 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5861
5862 std::pair<unsigned, const TargetRegisterClass *> R =
5863 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5864
5865 // Figure out which register class contains this reg.
5866 for (const TargetRegisterClass *RC : RI->regclasses()) {
5867 // If none of the value types for this register class are valid, we
5868 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5869 if (!isLegalRC(*RI, *RC))
5870 continue;
5871
5872 for (const MCPhysReg &PR : *RC) {
5873 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5874 std::pair<unsigned, const TargetRegisterClass *> S =
5875 std::make_pair(PR, RC);
5876
5877 // If this register class has the requested value type, return it,
5878 // otherwise keep searching and return the first class found
5879 // if no other is found which explicitly has the requested type.
5880 if (RI->isTypeLegalForClass(*RC, VT))
5881 return S;
5882 if (!R.second)
5883 R = S;
5884 }
5885 }
5886 }
5887
5888 return R;
5889}
5890
5891//===----------------------------------------------------------------------===//
5892// Constraint Selection.
5893
5894/// Return true of this is an input operand that is a matching constraint like
5895/// "4".
5897 assert(!ConstraintCode.empty() && "No known constraint!");
5898 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5899}
5900
5901/// If this is an input matching constraint, this method returns the output
5902/// operand it matches.
5904 assert(!ConstraintCode.empty() && "No known constraint!");
5905 return atoi(ConstraintCode.c_str());
5906}
5907
5908/// Split up the constraint string from the inline assembly value into the
5909/// specific constraints and their prefixes, and also tie in the associated
5910/// operand values.
5911/// If this returns an empty vector, and if the constraint string itself
5912/// isn't empty, there was an error parsing.
5915 const TargetRegisterInfo *TRI,
5916 const CallBase &Call) const {
5917 /// Information about all of the constraints.
5918 AsmOperandInfoVector ConstraintOperands;
5919 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5920 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5921
5922 // Do a prepass over the constraints, canonicalizing them, and building up the
5923 // ConstraintOperands list.
5924 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5925 unsigned ResNo = 0; // ResNo - The result number of the next output.
5926 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5927
5928 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5929 ConstraintOperands.emplace_back(std::move(CI));
5930 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5931
5932 // Update multiple alternative constraint count.
5933 if (OpInfo.multipleAlternatives.size() > maCount)
5934 maCount = OpInfo.multipleAlternatives.size();
5935
5936 OpInfo.ConstraintVT = MVT::Other;
5937
5938 // Compute the value type for each operand.
5939 switch (OpInfo.Type) {
5941 // Indirect outputs just consume an argument.
5942 if (OpInfo.isIndirect) {
5943 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5944 break;
5945 }
5946
5947 // The return value of the call is this value. As such, there is no
5948 // corresponding argument.
5949 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5950 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5951 OpInfo.ConstraintVT =
5952 getAsmOperandValueType(DL, STy->getElementType(ResNo))
5953 .getSimpleVT();
5954 } else {
5955 assert(ResNo == 0 && "Asm only has one result!");
5956 OpInfo.ConstraintVT =
5958 }
5959 ++ResNo;
5960 break;
5961 case InlineAsm::isInput:
5962 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5963 break;
5964 case InlineAsm::isLabel:
5965 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5966 ++LabelNo;
5967 continue;
5969 // Nothing to do.
5970 break;
5971 }
5972
5973 if (OpInfo.CallOperandVal) {
5974 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5975 if (OpInfo.isIndirect) {
5976 OpTy = Call.getParamElementType(ArgNo);
5977 assert(OpTy && "Indirect operand must have elementtype attribute");
5978 }
5979
5980 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5981 if (StructType *STy = dyn_cast<StructType>(OpTy))
5982 if (STy->getNumElements() == 1)
5983 OpTy = STy->getElementType(0);
5984
5985 // If OpTy is not a single value, it may be a struct/union that we
5986 // can tile with integers.
5987 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5988 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5989 switch (BitSize) {
5990 default: break;
5991 case 1:
5992 case 8:
5993 case 16:
5994 case 32:
5995 case 64:
5996 case 128:
5997 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5998 break;
5999 }
6000 }
6001
6002 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6003 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6004 ArgNo++;
6005 }
6006 }
6007
6008 // If we have multiple alternative constraints, select the best alternative.
6009 if (!ConstraintOperands.empty()) {
6010 if (maCount) {
6011 unsigned bestMAIndex = 0;
6012 int bestWeight = -1;
6013 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6014 int weight = -1;
6015 unsigned maIndex;
6016 // Compute the sums of the weights for each alternative, keeping track
6017 // of the best (highest weight) one so far.
6018 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6019 int weightSum = 0;
6020 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6021 cIndex != eIndex; ++cIndex) {
6022 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6023 if (OpInfo.Type == InlineAsm::isClobber)
6024 continue;
6025
6026 // If this is an output operand with a matching input operand,
6027 // look up the matching input. If their types mismatch, e.g. one
6028 // is an integer, the other is floating point, or their sizes are
6029 // different, flag it as an maCantMatch.
6030 if (OpInfo.hasMatchingInput()) {
6031 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6032 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6033 if ((OpInfo.ConstraintVT.isInteger() !=
6034 Input.ConstraintVT.isInteger()) ||
6035 (OpInfo.ConstraintVT.getSizeInBits() !=
6036 Input.ConstraintVT.getSizeInBits())) {
6037 weightSum = -1; // Can't match.
6038 break;
6039 }
6040 }
6041 }
6042 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6043 if (weight == -1) {
6044 weightSum = -1;
6045 break;
6046 }
6047 weightSum += weight;
6048 }
6049 // Update best.
6050 if (weightSum > bestWeight) {
6051 bestWeight = weightSum;
6052 bestMAIndex = maIndex;
6053 }
6054 }
6055
6056 // Now select chosen alternative in each constraint.
6057 for (AsmOperandInfo &cInfo : ConstraintOperands)
6058 if (cInfo.Type != InlineAsm::isClobber)
6059 cInfo.selectAlternative(bestMAIndex);
6060 }
6061 }
6062
6063 // Check and hook up tied operands, choose constraint code to use.
6064 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6065 cIndex != eIndex; ++cIndex) {
6066 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6067
6068 // If this is an output operand with a matching input operand, look up the
6069 // matching input. If their types mismatch, e.g. one is an integer, the
6070 // other is floating point, or their sizes are different, flag it as an
6071 // error.
6072 if (OpInfo.hasMatchingInput()) {
6073 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6074
6075 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6076 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6077 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6078 OpInfo.ConstraintVT);
6079 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6080 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6081 Input.ConstraintVT);
6082 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6083 OpInfo.ConstraintVT.isFloatingPoint();
6084 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6085 Input.ConstraintVT.isFloatingPoint();
6086 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6087 (MatchRC.second != InputRC.second)) {
6088 report_fatal_error("Unsupported asm: input constraint"
6089 " with a matching output constraint of"
6090 " incompatible type!");
6091 }
6092 }
6093 }
6094 }
6095
6096 return ConstraintOperands;
6097}
6098
6099/// Return a number indicating our preference for chosing a type of constraint
6100/// over another, for the purpose of sorting them. Immediates are almost always
6101/// preferrable (when they can be emitted). A higher return value means a
6102/// stronger preference for one constraint type relative to another.
6103/// FIXME: We should prefer registers over memory but doing so may lead to
6104/// unrecoverable register exhaustion later.
6105/// https://github.com/llvm/llvm-project/issues/20571
6107 switch (CT) {
6110 return 4;
6113 return 3;
6115 return 2;
6117 return 1;
6119 return 0;
6120 }
6121 llvm_unreachable("Invalid constraint type");
6122}
6123
6124/// Examine constraint type and operand type and determine a weight value.
6125/// This object must already have been set up with the operand type
6126/// and the current alternative constraint selected.
6129 AsmOperandInfo &info, int maIndex) const {
6131 if (maIndex >= (int)info.multipleAlternatives.size())
6132 rCodes = &info.Codes;
6133 else
6134 rCodes = &info.multipleAlternatives[maIndex].Codes;
6135 ConstraintWeight BestWeight = CW_Invalid;
6136
6137 // Loop over the options, keeping track of the most general one.
6138 for (const std::string &rCode : *rCodes) {
6139 ConstraintWeight weight =
6140 getSingleConstraintMatchWeight(info, rCode.c_str());
6141 if (weight > BestWeight)
6142 BestWeight = weight;
6143 }
6144
6145 return BestWeight;
6146}
6147
6148/// Examine constraint type and operand type and determine a weight value.
6149/// This object must already have been set up with the operand type
6150/// and the current alternative constraint selected.
6153 AsmOperandInfo &info, const char *constraint) const {
6155 Value *CallOperandVal = info.CallOperandVal;
6156 // If we don't have a value, we can't do a match,
6157 // but allow it at the lowest weight.
6158 if (!CallOperandVal)
6159 return CW_Default;
6160 // Look at the constraint type.
6161 switch (*constraint) {
6162 case 'i': // immediate integer.
6163 case 'n': // immediate integer with a known value.
6164 if (isa<ConstantInt>(CallOperandVal))
6165 weight = CW_Constant;
6166 break;
6167 case 's': // non-explicit intregal immediate.
6168 if (isa<GlobalValue>(CallOperandVal))
6169 weight = CW_Constant;
6170 break;
6171 case 'E': // immediate float if host format.
6172 case 'F': // immediate float.
6173 if (isa<ConstantFP>(CallOperandVal))
6174 weight = CW_Constant;
6175 break;
6176 case '<': // memory operand with autodecrement.
6177 case '>': // memory operand with autoincrement.
6178 case 'm': // memory operand.
6179 case 'o': // offsettable memory operand
6180 case 'V': // non-offsettable memory operand
6181 weight = CW_Memory;
6182 break;
6183 case 'r': // general register.
6184 case 'g': // general register, memory operand or immediate integer.
6185 // note: Clang converts "g" to "imr".
6186 if (CallOperandVal->getType()->isIntegerTy())
6187 weight = CW_Register;
6188 break;
6189 case 'X': // any operand.
6190 default:
6191 weight = CW_Default;
6192 break;
6193 }
6194 return weight;
6195}
6196
6197/// If there are multiple different constraints that we could pick for this
6198/// operand (e.g. "imr") try to pick the 'best' one.
6199/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6200/// into seven classes:
6201/// Register -> one specific register
6202/// RegisterClass -> a group of regs
6203/// Memory -> memory
6204/// Address -> a symbolic memory reference
6205/// Immediate -> immediate values
6206/// Other -> magic values (such as "Flag Output Operands")
6207/// Unknown -> something we don't recognize yet and can't handle
6208/// Ideally, we would pick the most specific constraint possible: if we have
6209/// something that fits into a register, we would pick it. The problem here
6210/// is that if we have something that could either be in a register or in
6211/// memory that use of the register could cause selection of *other*
6212/// operands to fail: they might only succeed if we pick memory. Because of
6213/// this the heuristic we use is:
6214///
6215/// 1) If there is an 'other' constraint, and if the operand is valid for
6216/// that constraint, use it. This makes us take advantage of 'i'
6217/// constraints when available.
6218/// 2) Otherwise, pick the most general constraint present. This prefers
6219/// 'm' over 'r', for example.
6220///
6222 TargetLowering::AsmOperandInfo &OpInfo) const {
6223 ConstraintGroup Ret;
6224
6225 Ret.reserve(OpInfo.Codes.size());
6226 for (StringRef Code : OpInfo.Codes) {
6228
6229 // Indirect 'other' or 'immediate' constraints are not allowed.
6230 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6231 CType == TargetLowering::C_Register ||
6233 continue;
6234
6235 // Things with matching constraints can only be registers, per gcc
6236 // documentation. This mainly affects "g" constraints.
6237 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6238 continue;
6239
6240 Ret.emplace_back(Code, CType);
6241 }
6242
6244 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6245 });
6246
6247 return Ret;
6248}
6249
6250/// If we have an immediate, see if we can lower it. Return true if we can,
6251/// false otherwise.
6253 SDValue Op, SelectionDAG *DAG,
6254 const TargetLowering &TLI) {
6255
6256 assert((P.second == TargetLowering::C_Other ||
6257 P.second == TargetLowering::C_Immediate) &&
6258 "need immediate or other");
6259
6260 if (!Op.getNode())
6261 return false;
6262
6263 std::vector<SDValue> ResultOps;
6264 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6265 return !ResultOps.empty();
6266}
6267
6268/// Determines the constraint code and constraint type to use for the specific
6269/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6271 SDValue Op,
6272 SelectionDAG *DAG) const {
6273 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6274
6275 // Single-letter constraints ('r') are very common.
6276 if (OpInfo.Codes.size() == 1) {
6277 OpInfo.ConstraintCode = OpInfo.Codes[0];
6278 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6279 } else {
6281 if (G.empty())
6282 return;
6283
6284 unsigned BestIdx = 0;
6285 for (const unsigned E = G.size();
6286 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6287 G[BestIdx].second == TargetLowering::C_Immediate);
6288 ++BestIdx) {
6289 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6290 break;
6291 // If we're out of constraints, just pick the first one.
6292 if (BestIdx + 1 == E) {
6293 BestIdx = 0;
6294 break;
6295 }
6296 }
6297
6298 OpInfo.ConstraintCode = G[BestIdx].first;
6299 OpInfo.ConstraintType = G[BestIdx].second;
6300 }
6301
6302 // 'X' matches anything.
6303 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6304 // Constants are handled elsewhere. For Functions, the type here is the
6305 // type of the result, which is not what we want to look at; leave them
6306 // alone.
6307 Value *v = OpInfo.CallOperandVal;
6308 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6309 return;
6310 }
6311
6312 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6313 OpInfo.ConstraintCode = "i";
6314 return;
6315 }
6316
6317 // Otherwise, try to resolve it to something we know about by looking at
6318 // the actual operand type.
6319 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6320 OpInfo.ConstraintCode = Repl;
6321 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6322 }
6323 }
6324}
6325
6326/// Given an exact SDIV by a constant, create a multiplication
6327/// with the multiplicative inverse of the constant.
6328/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6330 const SDLoc &dl, SelectionDAG &DAG,
6331 SmallVectorImpl<SDNode *> &Created) {
6332 SDValue Op0 = N->getOperand(0);
6333 SDValue Op1 = N->getOperand(1);
6334 EVT VT = N->getValueType(0);
6335 EVT SVT = VT.getScalarType();
6336 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6337 EVT ShSVT = ShVT.getScalarType();
6338
6339 bool UseSRA = false;
6340 SmallVector<SDValue, 16> Shifts, Factors;
6341
6342 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6343 if (C->isZero())
6344 return false;
6345 APInt Divisor = C->getAPIntValue();
6346 unsigned Shift = Divisor.countr_zero();
6347 if (Shift) {
6348 Divisor.ashrInPlace(Shift);
6349 UseSRA = true;
6350 }
6351 APInt Factor = Divisor.multiplicativeInverse();
6352 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6353 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6354 return true;
6355 };
6356
6357 // Collect all magic values from the build vector.
6358 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6359 return SDValue();
6360
6361 SDValue Shift, Factor;
6362 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6363 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6364 Factor = DAG.getBuildVector(VT, dl, Factors);
6365 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6366 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6367 "Expected matchUnaryPredicate to return one element for scalable "
6368 "vectors");
6369 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6370 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6371 } else {
6372 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6373 Shift = Shifts[0];
6374 Factor = Factors[0];
6375 }
6376
6377 SDValue Res = Op0;
6378 if (UseSRA) {
6379 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6380 Created.push_back(Res.getNode());
6381 }
6382
6383 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6384}
6385
6386/// Given an exact UDIV by a constant, create a multiplication
6387/// with the multiplicative inverse of the constant.
6388/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6390 const SDLoc &dl, SelectionDAG &DAG,
6391 SmallVectorImpl<SDNode *> &Created) {
6392 EVT VT = N->getValueType(0);
6393 EVT SVT = VT.getScalarType();
6394 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6395 EVT ShSVT = ShVT.getScalarType();
6396
6397 bool UseSRL = false;
6398 SmallVector<SDValue, 16> Shifts, Factors;
6399
6400 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6401 if (C->isZero())
6402 return false;
6403 APInt Divisor = C->getAPIntValue();
6404 unsigned Shift = Divisor.countr_zero();
6405 if (Shift) {
6406 Divisor.lshrInPlace(Shift);
6407 UseSRL = true;
6408 }
6409 // Calculate the multiplicative inverse modulo BW.
6410 APInt Factor = Divisor.multiplicativeInverse();
6411 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6412 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6413 return true;
6414 };
6415
6416 SDValue Op1 = N->getOperand(1);
6417
6418 // Collect all magic values from the build vector.
6419 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6420 return SDValue();
6421
6422 SDValue Shift, Factor;
6423 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6424 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6425 Factor = DAG.getBuildVector(VT, dl, Factors);
6426 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6427 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6428 "Expected matchUnaryPredicate to return one element for scalable "
6429 "vectors");
6430 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6431 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6432 } else {
6433 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6434 Shift = Shifts[0];
6435 Factor = Factors[0];
6436 }
6437
6438 SDValue Res = N->getOperand(0);
6439 if (UseSRL) {
6440 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6441 Created.push_back(Res.getNode());
6442 }
6443
6444 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6445}
6446
6448 SelectionDAG &DAG,
6449 SmallVectorImpl<SDNode *> &Created) const {
6450 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6451 if (isIntDivCheap(N->getValueType(0), Attr))
6452 return SDValue(N, 0); // Lower SDIV as SDIV
6453 return SDValue();
6454}
6455
6456SDValue
6458 SelectionDAG &DAG,
6459 SmallVectorImpl<SDNode *> &Created) const {
6460 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6461 if (isIntDivCheap(N->getValueType(0), Attr))
6462 return SDValue(N, 0); // Lower SREM as SREM
6463 return SDValue();
6464}
6465
6466/// Build sdiv by power-of-2 with conditional move instructions
6467/// Ref: "Hacker's Delight" by Henry Warren 10-1
6468/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6469/// bgez x, label
6470/// add x, x, 2**k-1
6471/// label:
6472/// sra res, x, k
6473/// neg res, res (when the divisor is negative)
6475 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6476 SmallVectorImpl<SDNode *> &Created) const {
6477 unsigned Lg2 = Divisor.countr_zero();
6478 EVT VT = N->getValueType(0);
6479
6480 SDLoc DL(N);
6481 SDValue N0 = N->getOperand(0);
6482 SDValue Zero = DAG.getConstant(0, DL, VT);
6483 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6484 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6485
6486 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6487 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6488 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6489 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6490 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6491
6492 Created.push_back(Cmp.getNode());
6493 Created.push_back(Add.getNode());
6494 Created.push_back(CMov.getNode());
6495
6496 // Divide by pow2.
6497 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6498 DAG.getShiftAmountConstant(Lg2, VT, DL));
6499
6500 // If we're dividing by a positive value, we're done. Otherwise, we must
6501 // negate the result.
6502 if (Divisor.isNonNegative())
6503 return SRA;
6504
6505 Created.push_back(SRA.getNode());
6506 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6507}
6508
6509/// Given an ISD::SDIV node expressing a divide by constant,
6510/// return a DAG expression to select that will generate the same value by
6511/// multiplying by a magic number.
6512/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6514 bool IsAfterLegalization,
6515 bool IsAfterLegalTypes,
6516 SmallVectorImpl<SDNode *> &Created) const {
6517 SDLoc dl(N);
6518 EVT VT = N->getValueType(0);
6519 EVT SVT = VT.getScalarType();
6520 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6521 EVT ShSVT = ShVT.getScalarType();
6522 unsigned EltBits = VT.getScalarSizeInBits();
6523 EVT MulVT;
6524
6525 // Check to see if we can do this.
6526 // FIXME: We should be more aggressive here.
6527 if (!isTypeLegal(VT)) {
6528 // Limit this to simple scalars for now.
6529 if (VT.isVector() || !VT.isSimple())
6530 return SDValue();
6531
6532 // If this type will be promoted to a large enough type with a legal
6533 // multiply operation, we can go ahead and do this transform.
6535 return SDValue();
6536
6537 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6538 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6539 !isOperationLegal(ISD::MUL, MulVT))
6540 return SDValue();
6541 }
6542
6543 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6544 if (N->getFlags().hasExact())
6545 return BuildExactSDIV(*this, N, dl, DAG, Created);
6546
6547 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6548
6549 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6550 if (C->isZero())
6551 return false;
6552
6553 const APInt &Divisor = C->getAPIntValue();
6555 int NumeratorFactor = 0;
6556 int ShiftMask = -1;
6557
6558 if (Divisor.isOne() || Divisor.isAllOnes()) {
6559 // If d is +1/-1, we just multiply the numerator by +1/-1.
6560 NumeratorFactor = Divisor.getSExtValue();
6561 magics.Magic = 0;
6562 magics.ShiftAmount = 0;
6563 ShiftMask = 0;
6564 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6565 // If d > 0 and m < 0, add the numerator.
6566 NumeratorFactor = 1;
6567 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6568 // If d < 0 and m > 0, subtract the numerator.
6569 NumeratorFactor = -1;
6570 }
6571
6572 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6573 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6574 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6575 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6576 return true;
6577 };
6578
6579 SDValue N0 = N->getOperand(0);
6580 SDValue N1 = N->getOperand(1);
6581
6582 // Collect the shifts / magic values from each element.
6583 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6584 return SDValue();
6585
6586 SDValue MagicFactor, Factor, Shift, ShiftMask;
6587 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6588 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6589 Factor = DAG.getBuildVector(VT, dl, Factors);
6590 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6591 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6592 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6593 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6594 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6595 "Expected matchUnaryPredicate to return one element for scalable "
6596 "vectors");
6597 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6598 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6599 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6600 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6601 } else {
6602 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6603 MagicFactor = MagicFactors[0];
6604 Factor = Factors[0];
6605 Shift = Shifts[0];
6606 ShiftMask = ShiftMasks[0];
6607 }
6608
6609 // Multiply the numerator (operand 0) by the magic value.
6610 // FIXME: We should support doing a MUL in a wider type.
6611 auto GetMULHS = [&](SDValue X, SDValue Y) {
6612 // If the type isn't legal, use a wider mul of the type calculated
6613 // earlier.
6614 if (!isTypeLegal(VT)) {
6615 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6616 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6617 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6618 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6619 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6620 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6621 }
6622
6623 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6624 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6625 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6626 SDValue LoHi =
6627 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6628 return SDValue(LoHi.getNode(), 1);
6629 }
6630 // If type twice as wide legal, widen and use a mul plus a shift.
6631 unsigned Size = VT.getScalarSizeInBits();
6632 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6633 if (VT.isVector())
6634 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6636 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6637 // custom lowered. This is very expensive so avoid it at all costs for
6638 // constant divisors.
6639 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6642 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6643 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6644 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6645 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6646 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6647 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6648 }
6649 return SDValue();
6650 };
6651
6652 SDValue Q = GetMULHS(N0, MagicFactor);
6653 if (!Q)
6654 return SDValue();
6655
6656 Created.push_back(Q.getNode());
6657
6658 // (Optionally) Add/subtract the numerator using Factor.
6659 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6660 Created.push_back(Factor.getNode());
6661 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6662 Created.push_back(Q.getNode());
6663
6664 // Shift right algebraic by shift value.
6665 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6666 Created.push_back(Q.getNode());
6667
6668 // Extract the sign bit, mask it and add it to the quotient.
6669 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6670 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6671 Created.push_back(T.getNode());
6672 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6673 Created.push_back(T.getNode());
6674 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6675}
6676
6677/// Given an ISD::UDIV node expressing a divide by constant,
6678/// return a DAG expression to select that will generate the same value by
6679/// multiplying by a magic number.
6680/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6682 bool IsAfterLegalization,
6683 bool IsAfterLegalTypes,
6684 SmallVectorImpl<SDNode *> &Created) const {
6685 SDLoc dl(N);
6686 EVT VT = N->getValueType(0);
6687 EVT SVT = VT.getScalarType();
6688 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6689 EVT ShSVT = ShVT.getScalarType();
6690 unsigned EltBits = VT.getScalarSizeInBits();
6691 EVT MulVT;
6692
6693 // Check to see if we can do this.
6694 // FIXME: We should be more aggressive here.
6695 if (!isTypeLegal(VT)) {
6696 // Limit this to simple scalars for now.
6697 if (VT.isVector() || !VT.isSimple())
6698 return SDValue();
6699
6700 // If this type will be promoted to a large enough type with a legal
6701 // multiply operation, we can go ahead and do this transform.
6703 return SDValue();
6704
6705 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6706 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6707 !isOperationLegal(ISD::MUL, MulVT))
6708 return SDValue();
6709 }
6710
6711 // If the udiv has an 'exact' bit we can use a simpler lowering.
6712 if (N->getFlags().hasExact())
6713 return BuildExactUDIV(*this, N, dl, DAG, Created);
6714
6715 SDValue N0 = N->getOperand(0);
6716 SDValue N1 = N->getOperand(1);
6717
6718 // Try to use leading zeros of the dividend to reduce the multiplier and
6719 // avoid expensive fixups.
6720 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6721
6722 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6723 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6724
6725 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6726 if (C->isZero())
6727 return false;
6728 const APInt& Divisor = C->getAPIntValue();
6729
6730 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6731
6732 // Magic algorithm doesn't work for division by 1. We need to emit a select
6733 // at the end.
6734 if (Divisor.isOne()) {
6735 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6736 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6737 } else {
6740 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6741
6742 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6743
6744 assert(magics.PreShift < Divisor.getBitWidth() &&
6745 "We shouldn't generate an undefined shift!");
6746 assert(magics.PostShift < Divisor.getBitWidth() &&
6747 "We shouldn't generate an undefined shift!");
6748 assert((!magics.IsAdd || magics.PreShift == 0) &&
6749 "Unexpected pre-shift");
6750 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6751 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6752 NPQFactor = DAG.getConstant(
6753 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6754 : APInt::getZero(EltBits),
6755 dl, SVT);
6756 UseNPQ |= magics.IsAdd;
6757 UsePreShift |= magics.PreShift != 0;
6758 UsePostShift |= magics.PostShift != 0;
6759 }
6760
6761 PreShifts.push_back(PreShift);
6762 MagicFactors.push_back(MagicFactor);
6763 NPQFactors.push_back(NPQFactor);
6764 PostShifts.push_back(PostShift);
6765 return true;
6766 };
6767
6768 // Collect the shifts/magic values from each element.
6769 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6770 return SDValue();
6771
6772 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6773 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6774 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6775 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6776 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6777 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6778 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6779 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6780 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6781 "Expected matchUnaryPredicate to return one for scalable vectors");
6782 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6783 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6784 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6785 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6786 } else {
6787 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6788 PreShift = PreShifts[0];
6789 MagicFactor = MagicFactors[0];
6790 PostShift = PostShifts[0];
6791 }
6792
6793 SDValue Q = N0;
6794 if (UsePreShift) {
6795 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6796 Created.push_back(Q.getNode());
6797 }
6798
6799 // FIXME: We should support doing a MUL in a wider type.
6800 auto GetMULHU = [&](SDValue X, SDValue Y) {
6801 // If the type isn't legal, use a wider mul of the type calculated
6802 // earlier.
6803 if (!isTypeLegal(VT)) {
6804 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6805 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6806 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6807 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6808 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6809 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6810 }
6811
6812 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6813 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6814 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6815 SDValue LoHi =
6816 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6817 return SDValue(LoHi.getNode(), 1);
6818 }
6819 // If type twice as wide legal, widen and use a mul plus a shift.
6820 unsigned Size = VT.getScalarSizeInBits();
6821 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6822 if (VT.isVector())
6823 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6825 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6826 // custom lowered. This is very expensive so avoid it at all costs for
6827 // constant divisors.
6828 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6831 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6832 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6833 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6834 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6835 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6836 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6837 }
6838 return SDValue(); // No mulhu or equivalent
6839 };
6840
6841 // Multiply the numerator (operand 0) by the magic value.
6842 Q = GetMULHU(Q, MagicFactor);
6843 if (!Q)
6844 return SDValue();
6845
6846 Created.push_back(Q.getNode());
6847
6848 if (UseNPQ) {
6849 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6850 Created.push_back(NPQ.getNode());
6851
6852 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6853 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6854 if (VT.isVector())
6855 NPQ = GetMULHU(NPQ, NPQFactor);
6856 else
6857 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6858
6859 Created.push_back(NPQ.getNode());
6860
6861 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6862 Created.push_back(Q.getNode());
6863 }
6864
6865 if (UsePostShift) {
6866 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6867 Created.push_back(Q.getNode());
6868 }
6869
6870 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6871
6872 SDValue One = DAG.getConstant(1, dl, VT);
6873 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6874 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6875}
6876
6877/// If all values in Values that *don't* match the predicate are same 'splat'
6878/// value, then replace all values with that splat value.
6879/// Else, if AlternativeReplacement was provided, then replace all values that
6880/// do match predicate with AlternativeReplacement value.
6881static void
6883 std::function<bool(SDValue)> Predicate,
6884 SDValue AlternativeReplacement = SDValue()) {
6885 SDValue Replacement;
6886 // Is there a value for which the Predicate does *NOT* match? What is it?
6887 auto SplatValue = llvm::find_if_not(Values, Predicate);
6888 if (SplatValue != Values.end()) {
6889 // Does Values consist only of SplatValue's and values matching Predicate?
6890 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6891 return Value == *SplatValue || Predicate(Value);
6892 })) // Then we shall replace values matching predicate with SplatValue.
6893 Replacement = *SplatValue;
6894 }
6895 if (!Replacement) {
6896 // Oops, we did not find the "baseline" splat value.
6897 if (!AlternativeReplacement)
6898 return; // Nothing to do.
6899 // Let's replace with provided value then.
6900 Replacement = AlternativeReplacement;
6901 }
6902 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6903}
6904
6905/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6906/// where the divisor is constant and the comparison target is zero,
6907/// return a DAG expression that will generate the same comparison result
6908/// using only multiplications, additions and shifts/rotations.
6909/// Ref: "Hacker's Delight" 10-17.
6910SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6911 SDValue CompTargetNode,
6913 DAGCombinerInfo &DCI,
6914 const SDLoc &DL) const {
6916 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6917 DCI, DL, Built)) {
6918 for (SDNode *N : Built)
6919 DCI.AddToWorklist(N);
6920 return Folded;
6921 }
6922
6923 return SDValue();
6924}
6925
6926SDValue
6927TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6928 SDValue CompTargetNode, ISD::CondCode Cond,
6929 DAGCombinerInfo &DCI, const SDLoc &DL,
6930 SmallVectorImpl<SDNode *> &Created) const {
6931 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6932 // - D must be constant, with D = D0 * 2^K where D0 is odd
6933 // - P is the multiplicative inverse of D0 modulo 2^W
6934 // - Q = floor(((2^W) - 1) / D)
6935 // where W is the width of the common type of N and D.
6936 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6937 "Only applicable for (in)equality comparisons.");
6938
6939 SelectionDAG &DAG = DCI.DAG;
6940
6941 EVT VT = REMNode.getValueType();
6942 EVT SVT = VT.getScalarType();
6943 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6944 EVT ShSVT = ShVT.getScalarType();
6945
6946 // If MUL is unavailable, we cannot proceed in any case.
6947 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6948 return SDValue();
6949
6950 bool ComparingWithAllZeros = true;
6951 bool AllComparisonsWithNonZerosAreTautological = true;
6952 bool HadTautologicalLanes = false;
6953 bool AllLanesAreTautological = true;
6954 bool HadEvenDivisor = false;
6955 bool AllDivisorsArePowerOfTwo = true;
6956 bool HadTautologicalInvertedLanes = false;
6957 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
6958
6959 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6960 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6961 if (CDiv->isZero())
6962 return false;
6963
6964 const APInt &D = CDiv->getAPIntValue();
6965 const APInt &Cmp = CCmp->getAPIntValue();
6966
6967 ComparingWithAllZeros &= Cmp.isZero();
6968
6969 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6970 // if C2 is not less than C1, the comparison is always false.
6971 // But we will only be able to produce the comparison that will give the
6972 // opposive tautological answer. So this lane would need to be fixed up.
6973 bool TautologicalInvertedLane = D.ule(Cmp);
6974 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6975
6976 // If all lanes are tautological (either all divisors are ones, or divisor
6977 // is not greater than the constant we are comparing with),
6978 // we will prefer to avoid the fold.
6979 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6980 HadTautologicalLanes |= TautologicalLane;
6981 AllLanesAreTautological &= TautologicalLane;
6982
6983 // If we are comparing with non-zero, we need'll need to subtract said
6984 // comparison value from the LHS. But there is no point in doing that if
6985 // every lane where we are comparing with non-zero is tautological..
6986 if (!Cmp.isZero())
6987 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6988
6989 // Decompose D into D0 * 2^K
6990 unsigned K = D.countr_zero();
6991 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6992 APInt D0 = D.lshr(K);
6993
6994 // D is even if it has trailing zeros.
6995 HadEvenDivisor |= (K != 0);
6996 // D is a power-of-two if D0 is one.
6997 // If all divisors are power-of-two, we will prefer to avoid the fold.
6998 AllDivisorsArePowerOfTwo &= D0.isOne();
6999
7000 // P = inv(D0, 2^W)
7001 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7002 unsigned W = D.getBitWidth();
7003 APInt P = D0.multiplicativeInverse();
7004 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7005
7006 // Q = floor((2^W - 1) u/ D)
7007 // R = ((2^W - 1) u% D)
7008 APInt Q, R;
7010
7011 // If we are comparing with zero, then that comparison constant is okay,
7012 // else it may need to be one less than that.
7013 if (Cmp.ugt(R))
7014 Q -= 1;
7015
7017 "We are expecting that K is always less than all-ones for ShSVT");
7018
7019 // If the lane is tautological the result can be constant-folded.
7020 if (TautologicalLane) {
7021 // Set P and K amount to a bogus values so we can try to splat them.
7022 P = 0;
7023 K = -1;
7024 // And ensure that comparison constant is tautological,
7025 // it will always compare true/false.
7026 Q = -1;
7027 }
7028
7029 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7030 KAmts.push_back(
7031 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7032 /*implicitTrunc=*/true),
7033 DL, ShSVT));
7034 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7035 return true;
7036 };
7037
7038 SDValue N = REMNode.getOperand(0);
7039 SDValue D = REMNode.getOperand(1);
7040
7041 // Collect the values from each element.
7042 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7043 return SDValue();
7044
7045 // If all lanes are tautological, the result can be constant-folded.
7046 if (AllLanesAreTautological)
7047 return SDValue();
7048
7049 // If this is a urem by a powers-of-two, avoid the fold since it can be
7050 // best implemented as a bit test.
7051 if (AllDivisorsArePowerOfTwo)
7052 return SDValue();
7053
7054 SDValue PVal, KVal, QVal;
7055 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7056 if (HadTautologicalLanes) {
7057 // Try to turn PAmts into a splat, since we don't care about the values
7058 // that are currently '0'. If we can't, just keep '0'`s.
7060 // Try to turn KAmts into a splat, since we don't care about the values
7061 // that are currently '-1'. If we can't, change them to '0'`s.
7063 DAG.getConstant(0, DL, ShSVT));
7064 }
7065
7066 PVal = DAG.getBuildVector(VT, DL, PAmts);
7067 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7068 QVal = DAG.getBuildVector(VT, DL, QAmts);
7069 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7070 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7071 "Expected matchBinaryPredicate to return one element for "
7072 "SPLAT_VECTORs");
7073 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7074 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7075 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7076 } else {
7077 PVal = PAmts[0];
7078 KVal = KAmts[0];
7079 QVal = QAmts[0];
7080 }
7081
7082 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7083 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7084 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7085 assert(CompTargetNode.getValueType() == N.getValueType() &&
7086 "Expecting that the types on LHS and RHS of comparisons match.");
7087 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7088 }
7089
7090 // (mul N, P)
7091 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7092 Created.push_back(Op0.getNode());
7093
7094 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7095 // divisors as a performance improvement, since rotating by 0 is a no-op.
7096 if (HadEvenDivisor) {
7097 // We need ROTR to do this.
7098 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7099 return SDValue();
7100 // UREM: (rotr (mul N, P), K)
7101 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7102 Created.push_back(Op0.getNode());
7103 }
7104
7105 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7106 SDValue NewCC =
7107 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7109 if (!HadTautologicalInvertedLanes)
7110 return NewCC;
7111
7112 // If any lanes previously compared always-false, the NewCC will give
7113 // always-true result for them, so we need to fixup those lanes.
7114 // Or the other way around for inequality predicate.
7115 assert(VT.isVector() && "Can/should only get here for vectors.");
7116 Created.push_back(NewCC.getNode());
7117
7118 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7119 // if C2 is not less than C1, the comparison is always false.
7120 // But we have produced the comparison that will give the
7121 // opposive tautological answer. So these lanes would need to be fixed up.
7122 SDValue TautologicalInvertedChannels =
7123 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7124 Created.push_back(TautologicalInvertedChannels.getNode());
7125
7126 // NOTE: we avoid letting illegal types through even if we're before legalize
7127 // ops – legalization has a hard time producing good code for this.
7128 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7129 // If we have a vector select, let's replace the comparison results in the
7130 // affected lanes with the correct tautological result.
7131 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7132 DL, SETCCVT, SETCCVT);
7133 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7134 Replacement, NewCC);
7135 }
7136
7137 // Else, we can just invert the comparison result in the appropriate lanes.
7138 //
7139 // NOTE: see the note above VSELECT above.
7140 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7141 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7142 TautologicalInvertedChannels);
7143
7144 return SDValue(); // Don't know how to lower.
7145}
7146
7147/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7148/// where the divisor is constant and the comparison target is zero,
7149/// return a DAG expression that will generate the same comparison result
7150/// using only multiplications, additions and shifts/rotations.
7151/// Ref: "Hacker's Delight" 10-17.
7152SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7153 SDValue CompTargetNode,
7155 DAGCombinerInfo &DCI,
7156 const SDLoc &DL) const {
7158 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7159 DCI, DL, Built)) {
7160 assert(Built.size() <= 7 && "Max size prediction failed.");
7161 for (SDNode *N : Built)
7162 DCI.AddToWorklist(N);
7163 return Folded;
7164 }
7165
7166 return SDValue();
7167}
7168
7169SDValue
7170TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7171 SDValue CompTargetNode, ISD::CondCode Cond,
7172 DAGCombinerInfo &DCI, const SDLoc &DL,
7173 SmallVectorImpl<SDNode *> &Created) const {
7174 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7175 // Fold:
7176 // (seteq/ne (srem N, D), 0)
7177 // To:
7178 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7179 //
7180 // - D must be constant, with D = D0 * 2^K where D0 is odd
7181 // - P is the multiplicative inverse of D0 modulo 2^W
7182 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7183 // - Q = floor((2 * A) / (2^K))
7184 // where W is the width of the common type of N and D.
7185 //
7186 // When D is a power of two (and thus D0 is 1), the normal
7187 // formula for A and Q don't apply, because the derivation
7188 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7189 // does not apply. This specifically fails when N = INT_MIN.
7190 //
7191 // Instead, for power-of-two D, we use:
7192 // - A = 2^(W-1)
7193 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7194 // - Q = 2^(W-K) - 1
7195 // |-> Test that the top K bits are zero after rotation
7196 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7197 "Only applicable for (in)equality comparisons.");
7198
7199 SelectionDAG &DAG = DCI.DAG;
7200
7201 EVT VT = REMNode.getValueType();
7202 EVT SVT = VT.getScalarType();
7203 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7204 EVT ShSVT = ShVT.getScalarType();
7205
7206 // If we are after ops legalization, and MUL is unavailable, we can not
7207 // proceed.
7208 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7209 return SDValue();
7210
7211 // TODO: Could support comparing with non-zero too.
7212 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7213 if (!CompTarget || !CompTarget->isZero())
7214 return SDValue();
7215
7216 bool HadIntMinDivisor = false;
7217 bool HadOneDivisor = false;
7218 bool AllDivisorsAreOnes = true;
7219 bool HadEvenDivisor = false;
7220 bool NeedToApplyOffset = false;
7221 bool AllDivisorsArePowerOfTwo = true;
7222 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7223
7224 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7225 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7226 if (C->isZero())
7227 return false;
7228
7229 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7230
7231 // WARNING: this fold is only valid for positive divisors!
7232 APInt D = C->getAPIntValue();
7233 if (D.isNegative())
7234 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7235
7236 HadIntMinDivisor |= D.isMinSignedValue();
7237
7238 // If all divisors are ones, we will prefer to avoid the fold.
7239 HadOneDivisor |= D.isOne();
7240 AllDivisorsAreOnes &= D.isOne();
7241
7242 // Decompose D into D0 * 2^K
7243 unsigned K = D.countr_zero();
7244 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7245 APInt D0 = D.lshr(K);
7246
7247 if (!D.isMinSignedValue()) {
7248 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7249 // we don't care about this lane in this fold, we'll special-handle it.
7250 HadEvenDivisor |= (K != 0);
7251 }
7252
7253 // D is a power-of-two if D0 is one. This includes INT_MIN.
7254 // If all divisors are power-of-two, we will prefer to avoid the fold.
7255 AllDivisorsArePowerOfTwo &= D0.isOne();
7256
7257 // P = inv(D0, 2^W)
7258 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7259 unsigned W = D.getBitWidth();
7260 APInt P = D0.multiplicativeInverse();
7261 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7262
7263 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7264 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7265 A.clearLowBits(K);
7266
7267 if (!D.isMinSignedValue()) {
7268 // If divisor INT_MIN, then we don't care about this lane in this fold,
7269 // we'll special-handle it.
7270 NeedToApplyOffset |= A != 0;
7271 }
7272
7273 // Q = floor((2 * A) / (2^K))
7274 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7275
7277 "We are expecting that A is always less than all-ones for SVT");
7279 "We are expecting that K is always less than all-ones for ShSVT");
7280
7281 // If D was a power of two, apply the alternate constant derivation.
7282 if (D0.isOne()) {
7283 // A = 2^(W-1)
7285 // - Q = 2^(W-K) - 1
7286 Q = APInt::getAllOnes(W - K).zext(W);
7287 }
7288
7289 // If the divisor is 1 the result can be constant-folded. Likewise, we
7290 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7291 if (D.isOne()) {
7292 // Set P, A and K to a bogus values so we can try to splat them.
7293 P = 0;
7294 A = -1;
7295 K = -1;
7296
7297 // x ?% 1 == 0 <--> true <--> x u<= -1
7298 Q = -1;
7299 }
7300
7301 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7302 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7303 KAmts.push_back(
7304 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7305 /*implicitTrunc=*/true),
7306 DL, ShSVT));
7307 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7308 return true;
7309 };
7310
7311 SDValue N = REMNode.getOperand(0);
7312 SDValue D = REMNode.getOperand(1);
7313
7314 // Collect the values from each element.
7315 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7316 return SDValue();
7317
7318 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7319 if (AllDivisorsAreOnes)
7320 return SDValue();
7321
7322 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7323 // since it can be best implemented as a bit test.
7324 if (AllDivisorsArePowerOfTwo)
7325 return SDValue();
7326
7327 SDValue PVal, AVal, KVal, QVal;
7328 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7329 if (HadOneDivisor) {
7330 // Try to turn PAmts into a splat, since we don't care about the values
7331 // that are currently '0'. If we can't, just keep '0'`s.
7333 // Try to turn AAmts into a splat, since we don't care about the
7334 // values that are currently '-1'. If we can't, change them to '0'`s.
7336 DAG.getConstant(0, DL, SVT));
7337 // Try to turn KAmts into a splat, since we don't care about the values
7338 // that are currently '-1'. If we can't, change them to '0'`s.
7340 DAG.getConstant(0, DL, ShSVT));
7341 }
7342
7343 PVal = DAG.getBuildVector(VT, DL, PAmts);
7344 AVal = DAG.getBuildVector(VT, DL, AAmts);
7345 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7346 QVal = DAG.getBuildVector(VT, DL, QAmts);
7347 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7348 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7349 QAmts.size() == 1 &&
7350 "Expected matchUnaryPredicate to return one element for scalable "
7351 "vectors");
7352 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7353 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7354 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7355 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7356 } else {
7357 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7358 PVal = PAmts[0];
7359 AVal = AAmts[0];
7360 KVal = KAmts[0];
7361 QVal = QAmts[0];
7362 }
7363
7364 // (mul N, P)
7365 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7366 Created.push_back(Op0.getNode());
7367
7368 if (NeedToApplyOffset) {
7369 // We need ADD to do this.
7370 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7371 return SDValue();
7372
7373 // (add (mul N, P), A)
7374 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7375 Created.push_back(Op0.getNode());
7376 }
7377
7378 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7379 // divisors as a performance improvement, since rotating by 0 is a no-op.
7380 if (HadEvenDivisor) {
7381 // We need ROTR to do this.
7382 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7383 return SDValue();
7384 // SREM: (rotr (add (mul N, P), A), K)
7385 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7386 Created.push_back(Op0.getNode());
7387 }
7388
7389 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7390 SDValue Fold =
7391 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7393
7394 // If we didn't have lanes with INT_MIN divisor, then we're done.
7395 if (!HadIntMinDivisor)
7396 return Fold;
7397
7398 // That fold is only valid for positive divisors. Which effectively means,
7399 // it is invalid for INT_MIN divisors. So if we have such a lane,
7400 // we must fix-up results for said lanes.
7401 assert(VT.isVector() && "Can/should only get here for vectors.");
7402
7403 // NOTE: we avoid letting illegal types through even if we're before legalize
7404 // ops – legalization has a hard time producing good code for the code that
7405 // follows.
7406 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7410 return SDValue();
7411
7412 Created.push_back(Fold.getNode());
7413
7414 SDValue IntMin = DAG.getConstant(
7416 SDValue IntMax = DAG.getConstant(
7418 SDValue Zero =
7420
7421 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7422 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7423 Created.push_back(DivisorIsIntMin.getNode());
7424
7425 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7426 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7427 Created.push_back(Masked.getNode());
7428 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7429 Created.push_back(MaskedIsZero.getNode());
7430
7431 // To produce final result we need to blend 2 vectors: 'SetCC' and
7432 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7433 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7434 // constant-folded, select can get lowered to a shuffle with constant mask.
7435 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7436 MaskedIsZero, Fold);
7437
7438 return Blended;
7439}
7440
7442 const DenormalMode &Mode) const {
7443 SDLoc DL(Op);
7444 EVT VT = Op.getValueType();
7445 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7446 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7447
7448 // This is specifically a check for the handling of denormal inputs, not the
7449 // result.
7450 if (Mode.Input == DenormalMode::PreserveSign ||
7451 Mode.Input == DenormalMode::PositiveZero) {
7452 // Test = X == 0.0
7453 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7454 }
7455
7456 // Testing it with denormal inputs to avoid wrong estimate.
7457 //
7458 // Test = fabs(X) < SmallestNormal
7459 const fltSemantics &FltSem = VT.getFltSemantics();
7460 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7461 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7462 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7463 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7464}
7465
7467 bool LegalOps, bool OptForSize,
7469 unsigned Depth) const {
7470 // fneg is removable even if it has multiple uses.
7471 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7473 return Op.getOperand(0);
7474 }
7475
7476 // Don't recurse exponentially.
7478 return SDValue();
7479
7480 // Pre-increment recursion depth for use in recursive calls.
7481 ++Depth;
7482 const SDNodeFlags Flags = Op->getFlags();
7483 const TargetOptions &Options = DAG.getTarget().Options;
7484 EVT VT = Op.getValueType();
7485 unsigned Opcode = Op.getOpcode();
7486
7487 // Don't allow anything with multiple uses unless we know it is free.
7488 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7489 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7490 isFPExtFree(VT, Op.getOperand(0).getValueType());
7491 if (!IsFreeExtend)
7492 return SDValue();
7493 }
7494
7495 auto RemoveDeadNode = [&](SDValue N) {
7496 if (N && N.getNode()->use_empty())
7497 DAG.RemoveDeadNode(N.getNode());
7498 };
7499
7500 SDLoc DL(Op);
7501
7502 // Because getNegatedExpression can delete nodes we need a handle to keep
7503 // temporary nodes alive in case the recursion manages to create an identical
7504 // node.
7505 std::list<HandleSDNode> Handles;
7506
7507 switch (Opcode) {
7508 case ISD::ConstantFP: {
7509 // Don't invert constant FP values after legalization unless the target says
7510 // the negated constant is legal.
7511 bool IsOpLegal =
7513 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7514 OptForSize);
7515
7516 if (LegalOps && !IsOpLegal)
7517 break;
7518
7519 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7520 V.changeSign();
7521 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7522
7523 // If we already have the use of the negated floating constant, it is free
7524 // to negate it even it has multiple uses.
7525 if (!Op.hasOneUse() && CFP.use_empty())
7526 break;
7528 return CFP;
7529 }
7530 case ISD::BUILD_VECTOR: {
7531 // Only permit BUILD_VECTOR of constants.
7532 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7533 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7534 }))
7535 break;
7536
7537 bool IsOpLegal =
7540 llvm::all_of(Op->op_values(), [&](SDValue N) {
7541 return N.isUndef() ||
7542 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7543 OptForSize);
7544 });
7545
7546 if (LegalOps && !IsOpLegal)
7547 break;
7548
7550 for (SDValue C : Op->op_values()) {
7551 if (C.isUndef()) {
7552 Ops.push_back(C);
7553 continue;
7554 }
7555 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7556 V.changeSign();
7557 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7558 }
7560 return DAG.getBuildVector(VT, DL, Ops);
7561 }
7562 case ISD::FADD: {
7563 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7564 break;
7565
7566 // After operation legalization, it might not be legal to create new FSUBs.
7567 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7568 break;
7569 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7570
7571 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7573 SDValue NegX =
7574 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7575 // Prevent this node from being deleted by the next call.
7576 if (NegX)
7577 Handles.emplace_back(NegX);
7578
7579 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7581 SDValue NegY =
7582 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7583
7584 // We're done with the handles.
7585 Handles.clear();
7586
7587 // Negate the X if its cost is less or equal than Y.
7588 if (NegX && (CostX <= CostY)) {
7589 Cost = CostX;
7590 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7591 if (NegY != N)
7592 RemoveDeadNode(NegY);
7593 return N;
7594 }
7595
7596 // Negate the Y if it is not expensive.
7597 if (NegY) {
7598 Cost = CostY;
7599 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7600 if (NegX != N)
7601 RemoveDeadNode(NegX);
7602 return N;
7603 }
7604 break;
7605 }
7606 case ISD::FSUB: {
7607 // We can't turn -(A-B) into B-A when we honor signed zeros.
7608 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7609 break;
7610
7611 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7612 // fold (fneg (fsub 0, Y)) -> Y
7613 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7614 if (C->isZero()) {
7616 return Y;
7617 }
7618
7619 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7621 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7622 }
7623 case ISD::FMUL:
7624 case ISD::FDIV: {
7625 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7626
7627 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7629 SDValue NegX =
7630 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7631 // Prevent this node from being deleted by the next call.
7632 if (NegX)
7633 Handles.emplace_back(NegX);
7634
7635 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7637 SDValue NegY =
7638 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7639
7640 // We're done with the handles.
7641 Handles.clear();
7642
7643 // Negate the X if its cost is less or equal than Y.
7644 if (NegX && (CostX <= CostY)) {
7645 Cost = CostX;
7646 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7647 if (NegY != N)
7648 RemoveDeadNode(NegY);
7649 return N;
7650 }
7651
7652 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7653 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7654 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7655 break;
7656
7657 // Negate the Y if it is not expensive.
7658 if (NegY) {
7659 Cost = CostY;
7660 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7661 if (NegX != N)
7662 RemoveDeadNode(NegX);
7663 return N;
7664 }
7665 break;
7666 }
7667 case ISD::FMA:
7668 case ISD::FMAD: {
7669 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7670 break;
7671
7672 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7674 SDValue NegZ =
7675 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7676 // Give up if fail to negate the Z.
7677 if (!NegZ)
7678 break;
7679
7680 // Prevent this node from being deleted by the next two calls.
7681 Handles.emplace_back(NegZ);
7682
7683 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7685 SDValue NegX =
7686 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7687 // Prevent this node from being deleted by the next call.
7688 if (NegX)
7689 Handles.emplace_back(NegX);
7690
7691 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7693 SDValue NegY =
7694 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7695
7696 // We're done with the handles.
7697 Handles.clear();
7698
7699 // Negate the X if its cost is less or equal than Y.
7700 if (NegX && (CostX <= CostY)) {
7701 Cost = std::min(CostX, CostZ);
7702 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7703 if (NegY != N)
7704 RemoveDeadNode(NegY);
7705 return N;
7706 }
7707
7708 // Negate the Y if it is not expensive.
7709 if (NegY) {
7710 Cost = std::min(CostY, CostZ);
7711 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7712 if (NegX != N)
7713 RemoveDeadNode(NegX);
7714 return N;
7715 }
7716 break;
7717 }
7718
7719 case ISD::FP_EXTEND:
7720 case ISD::FSIN:
7721 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7722 OptForSize, Cost, Depth))
7723 return DAG.getNode(Opcode, DL, VT, NegV);
7724 break;
7725 case ISD::FP_ROUND:
7726 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7727 OptForSize, Cost, Depth))
7728 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7729 break;
7730 case ISD::SELECT:
7731 case ISD::VSELECT: {
7732 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7733 // iff at least one cost is cheaper and the other is neutral/cheaper
7734 SDValue LHS = Op.getOperand(1);
7736 SDValue NegLHS =
7737 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7738 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7739 RemoveDeadNode(NegLHS);
7740 break;
7741 }
7742
7743 // Prevent this node from being deleted by the next call.
7744 Handles.emplace_back(NegLHS);
7745
7746 SDValue RHS = Op.getOperand(2);
7748 SDValue NegRHS =
7749 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7750
7751 // We're done with the handles.
7752 Handles.clear();
7753
7754 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7755 (CostLHS != NegatibleCost::Cheaper &&
7756 CostRHS != NegatibleCost::Cheaper)) {
7757 RemoveDeadNode(NegLHS);
7758 RemoveDeadNode(NegRHS);
7759 break;
7760 }
7761
7762 Cost = std::min(CostLHS, CostRHS);
7763 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7764 }
7765 }
7766
7767 return SDValue();
7768}
7769
7770//===----------------------------------------------------------------------===//
7771// Legalization Utilities
7772//===----------------------------------------------------------------------===//
7773
7774bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7775 SDValue LHS, SDValue RHS,
7777 EVT HiLoVT, SelectionDAG &DAG,
7778 MulExpansionKind Kind, SDValue LL,
7779 SDValue LH, SDValue RL, SDValue RH) const {
7780 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7781 Opcode == ISD::SMUL_LOHI);
7782
7783 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7785 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7787 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7789 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7791
7792 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7793 return false;
7794
7795 unsigned OuterBitSize = VT.getScalarSizeInBits();
7796 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7797
7798 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7799 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7800 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7801
7802 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7803 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7804 bool Signed) -> bool {
7805 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7806 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7807 Hi = SDValue(Lo.getNode(), 1);
7808 return true;
7809 }
7810 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7811 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7812 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7813 return true;
7814 }
7815 return false;
7816 };
7817
7818 SDValue Lo, Hi;
7819
7820 if (!LL.getNode() && !RL.getNode() &&
7822 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7823 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7824 }
7825
7826 if (!LL.getNode())
7827 return false;
7828
7829 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7830 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7831 DAG.MaskedValueIsZero(RHS, HighMask)) {
7832 // The inputs are both zero-extended.
7833 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7834 Result.push_back(Lo);
7835 Result.push_back(Hi);
7836 if (Opcode != ISD::MUL) {
7837 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7838 Result.push_back(Zero);
7839 Result.push_back(Zero);
7840 }
7841 return true;
7842 }
7843 }
7844
7845 if (!VT.isVector() && Opcode == ISD::MUL &&
7846 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7847 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7848 // The input values are both sign-extended.
7849 // TODO non-MUL case?
7850 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7851 Result.push_back(Lo);
7852 Result.push_back(Hi);
7853 return true;
7854 }
7855 }
7856
7857 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7858 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7859
7860 if (!LH.getNode() && !RH.getNode() &&
7863 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7864 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7865 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7866 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7867 }
7868
7869 if (!LH.getNode())
7870 return false;
7871
7872 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7873 return false;
7874
7875 Result.push_back(Lo);
7876
7877 if (Opcode == ISD::MUL) {
7878 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7879 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7880 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7881 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7882 Result.push_back(Hi);
7883 return true;
7884 }
7885
7886 // Compute the full width result.
7887 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7888 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7889 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7890 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7891 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7892 };
7893
7894 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7895 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7896 return false;
7897
7898 // This is effectively the add part of a multiply-add of half-sized operands,
7899 // so it cannot overflow.
7900 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7901
7902 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7903 return false;
7904
7905 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7906 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7907
7908 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7910 if (UseGlue)
7911 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7912 Merge(Lo, Hi));
7913 else
7914 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7915 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7916
7917 SDValue Carry = Next.getValue(1);
7918 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7919 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7920
7921 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7922 return false;
7923
7924 if (UseGlue)
7925 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7926 Carry);
7927 else
7928 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7929 Zero, Carry);
7930
7931 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7932
7933 if (Opcode == ISD::SMUL_LOHI) {
7934 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7935 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7936 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7937
7938 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7939 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7940 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7941 }
7942
7943 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7944 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7945 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7946 return true;
7947}
7948
7950 SelectionDAG &DAG, MulExpansionKind Kind,
7951 SDValue LL, SDValue LH, SDValue RL,
7952 SDValue RH) const {
7954 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7955 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7956 DAG, Kind, LL, LH, RL, RH);
7957 if (Ok) {
7958 assert(Result.size() == 2);
7959 Lo = Result[0];
7960 Hi = Result[1];
7961 }
7962 return Ok;
7963}
7964
7965// Optimize unsigned division or remainder by constants for types twice as large
7966// as a legal VT.
7967//
7968// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7969// can be computed
7970// as:
7971// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7972// Remainder = Sum % Constant
7973// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7974//
7975// For division, we can compute the remainder using the algorithm described
7976// above, subtract it from the dividend to get an exact multiple of Constant.
7977// Then multiply that exact multiply by the multiplicative inverse modulo
7978// (1 << (BitWidth / 2)) to get the quotient.
7979
7980// If Constant is even, we can shift right the dividend and the divisor by the
7981// number of trailing zeros in Constant before applying the remainder algorithm.
7982// If we're after the quotient, we can subtract this value from the shifted
7983// dividend and multiply by the multiplicative inverse of the shifted divisor.
7984// If we want the remainder, we shift the value left by the number of trailing
7985// zeros and add the bits that were shifted out of the dividend.
7988 EVT HiLoVT, SelectionDAG &DAG,
7989 SDValue LL, SDValue LH) const {
7990 unsigned Opcode = N->getOpcode();
7991 EVT VT = N->getValueType(0);
7992
7993 // TODO: Support signed division/remainder.
7994 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7995 return false;
7996 assert(
7997 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7998 "Unexpected opcode");
7999
8000 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8001 if (!CN)
8002 return false;
8003
8004 APInt Divisor = CN->getAPIntValue();
8005 unsigned BitWidth = Divisor.getBitWidth();
8006 unsigned HBitWidth = BitWidth / 2;
8008 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8009
8010 // Divisor needs to less than (1 << HBitWidth).
8011 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8012 if (Divisor.uge(HalfMaxPlus1))
8013 return false;
8014
8015 // We depend on the UREM by constant optimization in DAGCombiner that requires
8016 // high multiply.
8017 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8019 return false;
8020
8021 // Don't expand if optimizing for size.
8022 if (DAG.shouldOptForSize())
8023 return false;
8024
8025 // Early out for 0 or 1 divisors.
8026 if (Divisor.ule(1))
8027 return false;
8028
8029 // If the divisor is even, shift it until it becomes odd.
8030 unsigned TrailingZeros = 0;
8031 if (!Divisor[0]) {
8032 TrailingZeros = Divisor.countr_zero();
8033 Divisor.lshrInPlace(TrailingZeros);
8034 }
8035
8036 SDLoc dl(N);
8037 SDValue Sum;
8038 SDValue PartialRem;
8039
8040 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
8041 // then add in the carry.
8042 // TODO: If we can't split it in half, we might be able to split into 3 or
8043 // more pieces using a smaller bit width.
8044 if (HalfMaxPlus1.urem(Divisor).isOne()) {
8045 assert(!LL == !LH && "Expected both input halves or no input halves!");
8046 if (!LL)
8047 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8048
8049 // Shift the input by the number of TrailingZeros in the divisor. The
8050 // shifted out bits will be added to the remainder later.
8051 if (TrailingZeros) {
8052 // Save the shifted off bits if we need the remainder.
8053 if (Opcode != ISD::UDIV) {
8054 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8055 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8056 DAG.getConstant(Mask, dl, HiLoVT));
8057 }
8058
8059 LL = DAG.getNode(
8060 ISD::OR, dl, HiLoVT,
8061 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
8062 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
8063 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
8064 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
8065 HiLoVT, dl)));
8066 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
8067 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8068 }
8069
8070 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8071 EVT SetCCType =
8072 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8074 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8075 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8076 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8077 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8078 } else {
8079 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8080 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8081 // If the boolean for the target is 0 or 1, we can add the setcc result
8082 // directly.
8083 if (getBooleanContents(HiLoVT) ==
8085 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8086 else
8087 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8088 DAG.getConstant(0, dl, HiLoVT));
8089 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8090 }
8091 }
8092
8093 // If we didn't find a sum, we can't do the expansion.
8094 if (!Sum)
8095 return false;
8096
8097 // Perform a HiLoVT urem on the Sum using truncated divisor.
8098 SDValue RemL =
8099 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8100 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8101 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8102
8103 if (Opcode != ISD::UREM) {
8104 // Subtract the remainder from the shifted dividend.
8105 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8106 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8107
8108 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8109
8110 // Multiply by the multiplicative inverse of the divisor modulo
8111 // (1 << BitWidth).
8112 APInt MulFactor = Divisor.multiplicativeInverse();
8113
8114 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8115 DAG.getConstant(MulFactor, dl, VT));
8116
8117 // Split the quotient into low and high parts.
8118 SDValue QuotL, QuotH;
8119 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8120 Result.push_back(QuotL);
8121 Result.push_back(QuotH);
8122 }
8123
8124 if (Opcode != ISD::UDIV) {
8125 // If we shifted the input, shift the remainder left and add the bits we
8126 // shifted off the input.
8127 if (TrailingZeros) {
8128 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8129 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8130 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
8131 }
8132 Result.push_back(RemL);
8133 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
8134 }
8135
8136 return true;
8137}
8138
8139// Check that (every element of) Z is undef or not an exact multiple of BW.
8140static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8142 Z,
8143 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8144 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8145}
8146
8148 EVT VT = Node->getValueType(0);
8149 SDValue ShX, ShY;
8150 SDValue ShAmt, InvShAmt;
8151 SDValue X = Node->getOperand(0);
8152 SDValue Y = Node->getOperand(1);
8153 SDValue Z = Node->getOperand(2);
8154 SDValue Mask = Node->getOperand(3);
8155 SDValue VL = Node->getOperand(4);
8156
8157 unsigned BW = VT.getScalarSizeInBits();
8158 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8159 SDLoc DL(SDValue(Node, 0));
8160
8161 EVT ShVT = Z.getValueType();
8162 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8163 // fshl: X << C | Y >> (BW - C)
8164 // fshr: X << (BW - C) | Y >> C
8165 // where C = Z % BW is not zero
8166 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8167 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8168 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8169 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8170 VL);
8171 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8172 VL);
8173 } else {
8174 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8175 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8176 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8177 if (isPowerOf2_32(BW)) {
8178 // Z % BW -> Z & (BW - 1)
8179 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8180 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8181 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8182 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8183 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8184 } else {
8185 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8186 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8187 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8188 }
8189
8190 SDValue One = DAG.getConstant(1, DL, ShVT);
8191 if (IsFSHL) {
8192 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8193 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8194 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8195 } else {
8196 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8197 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8198 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8199 }
8200 }
8201 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8202}
8203
8205 SelectionDAG &DAG) const {
8206 if (Node->isVPOpcode())
8207 return expandVPFunnelShift(Node, DAG);
8208
8209 EVT VT = Node->getValueType(0);
8210
8211 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8215 return SDValue();
8216
8217 SDValue X = Node->getOperand(0);
8218 SDValue Y = Node->getOperand(1);
8219 SDValue Z = Node->getOperand(2);
8220
8221 unsigned BW = VT.getScalarSizeInBits();
8222 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8223 SDLoc DL(SDValue(Node, 0));
8224
8225 EVT ShVT = Z.getValueType();
8226
8227 // If a funnel shift in the other direction is more supported, use it.
8228 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8229 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8230 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8231 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8232 // fshl X, Y, Z -> fshr X, Y, -Z
8233 // fshr X, Y, Z -> fshl X, Y, -Z
8234 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8235 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8236 } else {
8237 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8238 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8239 SDValue One = DAG.getConstant(1, DL, ShVT);
8240 if (IsFSHL) {
8241 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8242 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8243 } else {
8244 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8245 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8246 }
8247 Z = DAG.getNOT(DL, Z, ShVT);
8248 }
8249 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8250 }
8251
8252 SDValue ShX, ShY;
8253 SDValue ShAmt, InvShAmt;
8254 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8255 // fshl: X << C | Y >> (BW - C)
8256 // fshr: X << (BW - C) | Y >> C
8257 // where C = Z % BW is not zero
8258 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8259 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8260 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8261 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8262 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8263 } else {
8264 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8265 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8266 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8267 if (isPowerOf2_32(BW)) {
8268 // Z % BW -> Z & (BW - 1)
8269 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8270 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8271 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8272 } else {
8273 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8274 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8275 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8276 }
8277
8278 SDValue One = DAG.getConstant(1, DL, ShVT);
8279 if (IsFSHL) {
8280 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8281 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8282 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8283 } else {
8284 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8285 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8286 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8287 }
8288 }
8289 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8290}
8291
8292// TODO: Merge with expandFunnelShift.
8294 SelectionDAG &DAG) const {
8295 EVT VT = Node->getValueType(0);
8296 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8297 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8298 SDValue Op0 = Node->getOperand(0);
8299 SDValue Op1 = Node->getOperand(1);
8300 SDLoc DL(SDValue(Node, 0));
8301
8302 EVT ShVT = Op1.getValueType();
8303 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8304
8305 // If a rotate in the other direction is more supported, use it.
8306 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8307 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8308 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8309 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8310 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8311 }
8312
8313 if (!AllowVectorOps && VT.isVector() &&
8319 return SDValue();
8320
8321 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8322 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8323 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8324 SDValue ShVal;
8325 SDValue HsVal;
8326 if (isPowerOf2_32(EltSizeInBits)) {
8327 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8328 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8329 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8330 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8331 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8332 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8333 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8334 } else {
8335 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8336 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8337 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8338 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8339 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8340 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8341 SDValue One = DAG.getConstant(1, DL, ShVT);
8342 HsVal =
8343 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8344 }
8345 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8346}
8347
8349 SelectionDAG &DAG) const {
8350 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8351 EVT VT = Node->getValueType(0);
8352 unsigned VTBits = VT.getScalarSizeInBits();
8353 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8354
8355 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8356 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8357 SDValue ShOpLo = Node->getOperand(0);
8358 SDValue ShOpHi = Node->getOperand(1);
8359 SDValue ShAmt = Node->getOperand(2);
8360 EVT ShAmtVT = ShAmt.getValueType();
8361 EVT ShAmtCCVT =
8362 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8363 SDLoc dl(Node);
8364
8365 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8366 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8367 // away during isel.
8368 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8369 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8370 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8371 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8372 : DAG.getConstant(0, dl, VT);
8373
8374 SDValue Tmp2, Tmp3;
8375 if (IsSHL) {
8376 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8377 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8378 } else {
8379 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8380 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8381 }
8382
8383 // If the shift amount is larger or equal than the width of a part we don't
8384 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8385 // values for large shift amounts.
8386 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8387 DAG.getConstant(VTBits, dl, ShAmtVT));
8388 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8389 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8390
8391 if (IsSHL) {
8392 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8393 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8394 } else {
8395 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8396 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8397 }
8398}
8399
8401 SelectionDAG &DAG) const {
8402 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8403 SDValue Src = Node->getOperand(OpNo);
8404 EVT SrcVT = Src.getValueType();
8405 EVT DstVT = Node->getValueType(0);
8406 SDLoc dl(SDValue(Node, 0));
8407
8408 // FIXME: Only f32 to i64 conversions are supported.
8409 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8410 return false;
8411
8412 if (Node->isStrictFPOpcode())
8413 // When a NaN is converted to an integer a trap is allowed. We can't
8414 // use this expansion here because it would eliminate that trap. Other
8415 // traps are also allowed and cannot be eliminated. See
8416 // IEEE 754-2008 sec 5.8.
8417 return false;
8418
8419 // Expand f32 -> i64 conversion
8420 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8421 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8422 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8423 EVT IntVT = SrcVT.changeTypeToInteger();
8424 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8425
8426 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8427 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8428 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8429 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8430 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8431 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8432
8433 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8434
8435 SDValue ExponentBits = DAG.getNode(
8436 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8437 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8438 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8439
8440 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8441 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8442 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8443 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8444
8445 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8446 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8447 DAG.getConstant(0x00800000, dl, IntVT));
8448
8449 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8450
8451 R = DAG.getSelectCC(
8452 dl, Exponent, ExponentLoBit,
8453 DAG.getNode(ISD::SHL, dl, DstVT, R,
8454 DAG.getZExtOrTrunc(
8455 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8456 dl, IntShVT)),
8457 DAG.getNode(ISD::SRL, dl, DstVT, R,
8458 DAG.getZExtOrTrunc(
8459 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8460 dl, IntShVT)),
8461 ISD::SETGT);
8462
8463 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8464 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8465
8466 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8467 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8468 return true;
8469}
8470
8472 SDValue &Chain,
8473 SelectionDAG &DAG) const {
8474 SDLoc dl(SDValue(Node, 0));
8475 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8476 SDValue Src = Node->getOperand(OpNo);
8477
8478 EVT SrcVT = Src.getValueType();
8479 EVT DstVT = Node->getValueType(0);
8480 EVT SetCCVT =
8481 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8482 EVT DstSetCCVT =
8483 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8484
8485 // Only expand vector types if we have the appropriate vector bit operations.
8486 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8488 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8490 return false;
8491
8492 // If the maximum float value is smaller then the signed integer range,
8493 // the destination signmask can't be represented by the float, so we can
8494 // just use FP_TO_SINT directly.
8495 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8496 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8497 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8499 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8500 if (Node->isStrictFPOpcode()) {
8501 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8502 { Node->getOperand(0), Src });
8503 Chain = Result.getValue(1);
8504 } else
8505 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8506 return true;
8507 }
8508
8509 // Don't expand it if there isn't cheap fsub instruction.
8511 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8512 return false;
8513
8514 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8515 SDValue Sel;
8516
8517 if (Node->isStrictFPOpcode()) {
8518 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8519 Node->getOperand(0), /*IsSignaling*/ true);
8520 Chain = Sel.getValue(1);
8521 } else {
8522 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8523 }
8524
8525 bool Strict = Node->isStrictFPOpcode() ||
8526 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8527
8528 if (Strict) {
8529 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8530 // signmask then offset (the result of which should be fully representable).
8531 // Sel = Src < 0x8000000000000000
8532 // FltOfs = select Sel, 0, 0x8000000000000000
8533 // IntOfs = select Sel, 0, 0x8000000000000000
8534 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8535
8536 // TODO: Should any fast-math-flags be set for the FSUB?
8537 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8538 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8539 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8540 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8541 DAG.getConstant(0, dl, DstVT),
8542 DAG.getConstant(SignMask, dl, DstVT));
8543 SDValue SInt;
8544 if (Node->isStrictFPOpcode()) {
8545 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8546 { Chain, Src, FltOfs });
8547 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8548 { Val.getValue(1), Val });
8549 Chain = SInt.getValue(1);
8550 } else {
8551 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8552 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8553 }
8554 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8555 } else {
8556 // Expand based on maximum range of FP_TO_SINT:
8557 // True = fp_to_sint(Src)
8558 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8559 // Result = select (Src < 0x8000000000000000), True, False
8560
8561 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8562 // TODO: Should any fast-math-flags be set for the FSUB?
8563 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8564 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8565 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8566 DAG.getConstant(SignMask, dl, DstVT));
8567 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8568 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8569 }
8570 return true;
8571}
8572
8574 SDValue &Chain, SelectionDAG &DAG) const {
8575 // This transform is not correct for converting 0 when rounding mode is set
8576 // to round toward negative infinity which will produce -0.0. So disable
8577 // under strictfp.
8578 if (Node->isStrictFPOpcode())
8579 return false;
8580
8581 SDValue Src = Node->getOperand(0);
8582 EVT SrcVT = Src.getValueType();
8583 EVT DstVT = Node->getValueType(0);
8584
8585 // If the input is known to be non-negative and SINT_TO_FP is legal then use
8586 // it.
8587 if (Node->getFlags().hasNonNeg() &&
8589 Result =
8590 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8591 return true;
8592 }
8593
8594 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8595 return false;
8596
8597 // Only expand vector types if we have the appropriate vector bit
8598 // operations.
8599 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8604 return false;
8605
8606 SDLoc dl(SDValue(Node, 0));
8607
8608 // Implementation of unsigned i64 to f64 following the algorithm in
8609 // __floatundidf in compiler_rt. This implementation performs rounding
8610 // correctly in all rounding modes with the exception of converting 0
8611 // when rounding toward negative infinity. In that case the fsub will
8612 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8613 // incorrect.
8614 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8615 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8616 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8617 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8618 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8619 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
8620
8621 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8622 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8623 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8624 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8625 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8626 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8627 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8628 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8629 return true;
8630}
8631
8632SDValue
8634 SelectionDAG &DAG) const {
8635 unsigned Opcode = Node->getOpcode();
8636 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8637 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8638 "Wrong opcode");
8639
8640 if (Node->getFlags().hasNoNaNs()) {
8641 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8642 EVT VT = Node->getValueType(0);
8643 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8645 VT.isVector())
8646 return SDValue();
8647 SDValue Op1 = Node->getOperand(0);
8648 SDValue Op2 = Node->getOperand(1);
8649 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
8650 Node->getFlags());
8651 }
8652
8653 return SDValue();
8654}
8655
8657 SelectionDAG &DAG) const {
8658 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8659 return Expanded;
8660
8661 EVT VT = Node->getValueType(0);
8662 if (VT.isScalableVector())
8664 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8665
8666 SDLoc dl(Node);
8667 unsigned NewOp =
8668 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8669
8670 if (isOperationLegalOrCustom(NewOp, VT)) {
8671 SDValue Quiet0 = Node->getOperand(0);
8672 SDValue Quiet1 = Node->getOperand(1);
8673
8674 if (!Node->getFlags().hasNoNaNs()) {
8675 // Insert canonicalizes if it's possible we need to quiet to get correct
8676 // sNaN behavior.
8677 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8678 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8679 Node->getFlags());
8680 }
8681 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8682 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8683 Node->getFlags());
8684 }
8685 }
8686
8687 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8688 }
8689
8690 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8691 // instead if there are no NaNs and there can't be an incompatible zero
8692 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8693 if ((Node->getFlags().hasNoNaNs() ||
8694 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8695 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8696 (Node->getFlags().hasNoSignedZeros() ||
8697 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8698 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8699 unsigned IEEE2018Op =
8700 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8701 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8702 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8703 Node->getOperand(1), Node->getFlags());
8704 }
8705
8707 return SelCC;
8708
8709 return SDValue();
8710}
8711
8713 SelectionDAG &DAG) const {
8714 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8715 return Expanded;
8716
8717 SDLoc DL(N);
8718 SDValue LHS = N->getOperand(0);
8719 SDValue RHS = N->getOperand(1);
8720 unsigned Opc = N->getOpcode();
8721 EVT VT = N->getValueType(0);
8722 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8723 bool IsMax = Opc == ISD::FMAXIMUM;
8724 SDNodeFlags Flags = N->getFlags();
8725
8726 // First, implement comparison not propagating NaN. If no native fmin or fmax
8727 // available, use plain select with setcc instead.
8729 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8730 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8731
8732 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8733 // signed zero behavior.
8734 bool MinMaxMustRespectOrderedZero = false;
8735
8736 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8737 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8738 MinMaxMustRespectOrderedZero = true;
8739 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8740 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8741 } else {
8743 return DAG.UnrollVectorOp(N);
8744
8745 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8746 SDValue Compare =
8747 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8748 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8749 }
8750
8751 // Propagate any NaN of both operands
8752 if (!N->getFlags().hasNoNaNs() &&
8753 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8754 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8756 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8757 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8758 }
8759
8760 // fminimum/fmaximum requires -0.0 less than +0.0
8761 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8762 !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8763 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8764 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8765 SDValue TestZero =
8766 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8767 SDValue LCmp = DAG.getSelect(
8768 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8769 MinMax, Flags);
8770 SDValue RCmp = DAG.getSelect(
8771 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8772 LCmp, Flags);
8773 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8774 }
8775
8776 return MinMax;
8777}
8778
8780 SelectionDAG &DAG) const {
8781 SDLoc DL(Node);
8782 SDValue LHS = Node->getOperand(0);
8783 SDValue RHS = Node->getOperand(1);
8784 unsigned Opc = Node->getOpcode();
8785 EVT VT = Node->getValueType(0);
8786 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8787 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8788 const TargetOptions &Options = DAG.getTarget().Options;
8789 SDNodeFlags Flags = Node->getFlags();
8790
8791 unsigned NewOp =
8792 Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8793
8794 if (isOperationLegalOrCustom(NewOp, VT)) {
8795 if (!Flags.hasNoNaNs()) {
8796 // Insert canonicalizes if it's possible we need to quiet to get correct
8797 // sNaN behavior.
8798 if (!DAG.isKnownNeverSNaN(LHS)) {
8799 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8800 }
8801 if (!DAG.isKnownNeverSNaN(RHS)) {
8802 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8803 }
8804 }
8805
8806 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8807 }
8808
8809 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8810 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8811 if (Flags.hasNoNaNs() ||
8812 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8813 unsigned IEEE2019Op =
8814 Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8815 if (isOperationLegalOrCustom(IEEE2019Op, VT))
8816 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8817 }
8818
8819 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8820 // either one for +0.0 vs -0.0.
8821 if ((Flags.hasNoNaNs() ||
8822 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8823 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8824 DAG.isKnownNeverZeroFloat(RHS))) {
8825 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8826 if (isOperationLegalOrCustom(IEEE2008Op, VT))
8827 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8828 }
8829
8831 return DAG.UnrollVectorOp(Node);
8832
8833 // If only one operand is NaN, override it with another operand.
8834 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8835 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8836 }
8837 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8838 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8839 }
8840
8841 SDValue MinMax =
8842 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8843
8844 // TODO: We need quiet sNaN if strictfp.
8845
8846 // Fixup signed zero behavior.
8847 if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8848 DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
8849 return MinMax;
8850 }
8851 SDValue TestZero =
8852 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8853 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8854 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8855 SDValue LCmp = DAG.getSelect(
8856 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8857 MinMax, Flags);
8858 SDValue RCmp = DAG.getSelect(
8859 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8860 Flags);
8861 return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8862}
8863
8864/// Returns a true value if if this FPClassTest can be performed with an ordered
8865/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8866/// std::nullopt if it cannot be performed as a compare with 0.
8867static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8868 const fltSemantics &Semantics,
8869 const MachineFunction &MF) {
8870 FPClassTest OrderedMask = Test & ~fcNan;
8871 FPClassTest NanTest = Test & fcNan;
8872 bool IsOrdered = NanTest == fcNone;
8873 bool IsUnordered = NanTest == fcNan;
8874
8875 // Skip cases that are testing for only a qnan or snan.
8876 if (!IsOrdered && !IsUnordered)
8877 return std::nullopt;
8878
8879 if (OrderedMask == fcZero &&
8880 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8881 return IsOrdered;
8882 if (OrderedMask == (fcZero | fcSubnormal) &&
8883 MF.getDenormalMode(Semantics).inputsAreZero())
8884 return IsOrdered;
8885 return std::nullopt;
8886}
8887
8889 const FPClassTest OrigTestMask,
8890 SDNodeFlags Flags, const SDLoc &DL,
8891 SelectionDAG &DAG) const {
8892 EVT OperandVT = Op.getValueType();
8893 assert(OperandVT.isFloatingPoint());
8894 FPClassTest Test = OrigTestMask;
8895
8896 // Degenerated cases.
8897 if (Test == fcNone)
8898 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8899 if (Test == fcAllFlags)
8900 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8901
8902 // PPC double double is a pair of doubles, of which the higher part determines
8903 // the value class.
8904 if (OperandVT == MVT::ppcf128) {
8905 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8906 DAG.getConstant(1, DL, MVT::i32));
8907 OperandVT = MVT::f64;
8908 }
8909
8910 // Floating-point type properties.
8911 EVT ScalarFloatVT = OperandVT.getScalarType();
8912 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8913 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8914 bool IsF80 = (ScalarFloatVT == MVT::f80);
8915
8916 // Some checks can be implemented using float comparisons, if floating point
8917 // exceptions are ignored.
8918 if (Flags.hasNoFPExcept() &&
8920 FPClassTest FPTestMask = Test;
8921 bool IsInvertedFP = false;
8922
8923 if (FPClassTest InvertedFPCheck =
8924 invertFPClassTestIfSimpler(FPTestMask, true)) {
8925 FPTestMask = InvertedFPCheck;
8926 IsInvertedFP = true;
8927 }
8928
8929 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8930 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8931
8932 // See if we can fold an | fcNan into an unordered compare.
8933 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8934
8935 // Can't fold the ordered check if we're only testing for snan or qnan
8936 // individually.
8937 if ((FPTestMask & fcNan) != fcNan)
8938 OrderedFPTestMask = FPTestMask;
8939
8940 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8941
8942 if (std::optional<bool> IsCmp0 =
8943 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8944 IsCmp0 && (isCondCodeLegalOrCustom(
8945 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8946 OperandVT.getScalarType().getSimpleVT()))) {
8947
8948 // If denormals could be implicitly treated as 0, this is not equivalent
8949 // to a compare with 0 since it will also be true for denormals.
8950 return DAG.getSetCC(DL, ResultVT, Op,
8951 DAG.getConstantFP(0.0, DL, OperandVT),
8952 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8953 }
8954
8955 if (FPTestMask == fcNan &&
8957 OperandVT.getScalarType().getSimpleVT()))
8958 return DAG.getSetCC(DL, ResultVT, Op, Op,
8959 IsInvertedFP ? ISD::SETO : ISD::SETUO);
8960
8961 bool IsOrderedInf = FPTestMask == fcInf;
8962 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8963 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8964 : UnorderedCmpOpcode,
8965 OperandVT.getScalarType().getSimpleVT()) &&
8966 isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&
8968 (OperandVT.isVector() &&
8970 // isinf(x) --> fabs(x) == inf
8971 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8972 SDValue Inf =
8973 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8974 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8975 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8976 }
8977
8978 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
8979 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
8980 : UnorderedCmpOpcode,
8981 OperandVT.getSimpleVT())) {
8982 // isposinf(x) --> x == inf
8983 // isneginf(x) --> x == -inf
8984 // isposinf(x) || nan --> x u== inf
8985 // isneginf(x) || nan --> x u== -inf
8986
8987 SDValue Inf = DAG.getConstantFP(
8988 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
8989 OperandVT);
8990 return DAG.getSetCC(DL, ResultVT, Op, Inf,
8991 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
8992 }
8993
8994 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
8995 // TODO: Could handle ordered case, but it produces worse code for
8996 // x86. Maybe handle ordered if fabs is free?
8997
8998 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8999 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9000
9001 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9002 OperandVT.getScalarType().getSimpleVT())) {
9003 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9004
9005 // TODO: Maybe only makes sense if fabs is free. Integer test of
9006 // exponent bits seems better for x86.
9007 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9008 SDValue SmallestNormal = DAG.getConstantFP(
9009 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9010 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9011 IsOrdered ? OrderedOp : UnorderedOp);
9012 }
9013 }
9014
9015 if (FPTestMask == fcNormal) {
9016 // TODO: Handle unordered
9017 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9018 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9019
9020 if (isCondCodeLegalOrCustom(IsFiniteOp,
9021 OperandVT.getScalarType().getSimpleVT()) &&
9022 isCondCodeLegalOrCustom(IsNormalOp,
9023 OperandVT.getScalarType().getSimpleVT()) &&
9024 isFAbsFree(OperandVT)) {
9025 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9026 SDValue Inf =
9027 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9028 SDValue SmallestNormal = DAG.getConstantFP(
9029 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9030
9031 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9032 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9033 SDValue IsNormal =
9034 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9035 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9036 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9037 }
9038 }
9039 }
9040
9041 // Some checks may be represented as inversion of simpler check, for example
9042 // "inf|normal|subnormal|zero" => !"nan".
9043 bool IsInverted = false;
9044
9045 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9046 Test = InvertedCheck;
9047 IsInverted = true;
9048 }
9049
9050 // In the general case use integer operations.
9051 unsigned BitSize = OperandVT.getScalarSizeInBits();
9052 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
9053 if (OperandVT.isVector())
9054 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
9055 OperandVT.getVectorElementCount());
9056 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9057
9058 // Various masks.
9059 APInt SignBit = APInt::getSignMask(BitSize);
9060 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9061 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9062 const unsigned ExplicitIntBitInF80 = 63;
9063 APInt ExpMask = Inf;
9064 if (IsF80)
9065 ExpMask.clearBit(ExplicitIntBitInF80);
9066 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9067 APInt QNaNBitMask =
9068 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9069 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9070
9071 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9072 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9073 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9074 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9075 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9076 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9077
9078 SDValue Res;
9079 const auto appendResult = [&](SDValue PartialRes) {
9080 if (PartialRes) {
9081 if (Res)
9082 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9083 else
9084 Res = PartialRes;
9085 }
9086 };
9087
9088 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9089 const auto getIntBitIsSet = [&]() -> SDValue {
9090 if (!IntBitIsSetV) {
9091 APInt IntBitMask(BitSize, 0);
9092 IntBitMask.setBit(ExplicitIntBitInF80);
9093 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9094 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9095 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9096 }
9097 return IntBitIsSetV;
9098 };
9099
9100 // Split the value into sign bit and absolute value.
9101 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9102 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9103 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9104
9105 // Tests that involve more than one class should be processed first.
9106 SDValue PartialRes;
9107
9108 if (IsF80)
9109 ; // Detect finite numbers of f80 by checking individual classes because
9110 // they have different settings of the explicit integer bit.
9111 else if ((Test & fcFinite) == fcFinite) {
9112 // finite(V) ==> abs(V) < exp_mask
9113 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9114 Test &= ~fcFinite;
9115 } else if ((Test & fcFinite) == fcPosFinite) {
9116 // finite(V) && V > 0 ==> V < exp_mask
9117 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9118 Test &= ~fcPosFinite;
9119 } else if ((Test & fcFinite) == fcNegFinite) {
9120 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9121 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9122 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9123 Test &= ~fcNegFinite;
9124 }
9125 appendResult(PartialRes);
9126
9127 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9128 // fcZero | fcSubnormal => test all exponent bits are 0
9129 // TODO: Handle sign bit specific cases
9130 if (PartialCheck == (fcZero | fcSubnormal)) {
9131 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9132 SDValue ExpIsZero =
9133 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9134 appendResult(ExpIsZero);
9135 Test &= ~PartialCheck & fcAllFlags;
9136 }
9137 }
9138
9139 // Check for individual classes.
9140
9141 if (unsigned PartialCheck = Test & fcZero) {
9142 if (PartialCheck == fcPosZero)
9143 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
9144 else if (PartialCheck == fcZero)
9145 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
9146 else // ISD::fcNegZero
9147 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
9148 appendResult(PartialRes);
9149 }
9150
9151 if (unsigned PartialCheck = Test & fcSubnormal) {
9152 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9153 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9154 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9155 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
9156 SDValue VMinusOneV =
9157 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
9158 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
9159 if (PartialCheck == fcNegSubnormal)
9160 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9161 appendResult(PartialRes);
9162 }
9163
9164 if (unsigned PartialCheck = Test & fcInf) {
9165 if (PartialCheck == fcPosInf)
9166 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9167 else if (PartialCheck == fcInf)
9168 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9169 else { // ISD::fcNegInf
9170 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9171 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9172 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9173 }
9174 appendResult(PartialRes);
9175 }
9176
9177 if (unsigned PartialCheck = Test & fcNan) {
9178 APInt InfWithQnanBit = Inf | QNaNBitMask;
9179 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9180 if (PartialCheck == fcNan) {
9181 // isnan(V) ==> abs(V) > int(inf)
9182 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9183 if (IsF80) {
9184 // Recognize unsupported values as NaNs for compatibility with glibc.
9185 // In them (exp(V)==0) == int_bit.
9186 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9187 SDValue ExpIsZero =
9188 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9189 SDValue IsPseudo =
9190 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9191 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9192 }
9193 } else if (PartialCheck == fcQNan) {
9194 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9195 PartialRes =
9196 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9197 } else { // ISD::fcSNan
9198 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9199 // abs(V) < (unsigned(Inf) | quiet_bit)
9200 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9201 SDValue IsNotQnan =
9202 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9203 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9204 }
9205 appendResult(PartialRes);
9206 }
9207
9208 if (unsigned PartialCheck = Test & fcNormal) {
9209 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9210 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9211 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9212 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9213 APInt ExpLimit = ExpMask - ExpLSB;
9214 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9215 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9216 if (PartialCheck == fcNegNormal)
9217 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9218 else if (PartialCheck == fcPosNormal) {
9219 SDValue PosSignV =
9220 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
9221 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9222 }
9223 if (IsF80)
9224 PartialRes =
9225 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9226 appendResult(PartialRes);
9227 }
9228
9229 if (!Res)
9230 return DAG.getConstant(IsInverted, DL, ResultVT);
9231 if (IsInverted)
9232 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
9233 return Res;
9234}
9235
9236// Only expand vector types if we have the appropriate vector bit operations.
9237static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9238 assert(VT.isVector() && "Expected vector type");
9239 unsigned Len = VT.getScalarSizeInBits();
9240 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9243 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9245}
9246
9248 SDLoc dl(Node);
9249 EVT VT = Node->getValueType(0);
9250 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9251 SDValue Op = Node->getOperand(0);
9252 unsigned Len = VT.getScalarSizeInBits();
9253 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9254
9255 // TODO: Add support for irregular type lengths.
9256 if (!(Len <= 128 && Len % 8 == 0))
9257 return SDValue();
9258
9259 // Only expand vector types if we have the appropriate vector bit operations.
9260 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9261 return SDValue();
9262
9263 // This is the "best" algorithm from
9264 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9265 SDValue Mask55 =
9266 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9267 SDValue Mask33 =
9268 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9269 SDValue Mask0F =
9270 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9271
9272 // v = v - ((v >> 1) & 0x55555555...)
9273 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9274 DAG.getNode(ISD::AND, dl, VT,
9275 DAG.getNode(ISD::SRL, dl, VT, Op,
9276 DAG.getConstant(1, dl, ShVT)),
9277 Mask55));
9278 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9279 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9280 DAG.getNode(ISD::AND, dl, VT,
9281 DAG.getNode(ISD::SRL, dl, VT, Op,
9282 DAG.getConstant(2, dl, ShVT)),
9283 Mask33));
9284 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9285 Op = DAG.getNode(ISD::AND, dl, VT,
9286 DAG.getNode(ISD::ADD, dl, VT, Op,
9287 DAG.getNode(ISD::SRL, dl, VT, Op,
9288 DAG.getConstant(4, dl, ShVT))),
9289 Mask0F);
9290
9291 if (Len <= 8)
9292 return Op;
9293
9294 // Avoid the multiply if we only have 2 bytes to add.
9295 // TODO: Only doing this for scalars because vectors weren't as obviously
9296 // improved.
9297 if (Len == 16 && !VT.isVector()) {
9298 // v = (v + (v >> 8)) & 0x00FF;
9299 return DAG.getNode(ISD::AND, dl, VT,
9300 DAG.getNode(ISD::ADD, dl, VT, Op,
9301 DAG.getNode(ISD::SRL, dl, VT, Op,
9302 DAG.getConstant(8, dl, ShVT))),
9303 DAG.getConstant(0xFF, dl, VT));
9304 }
9305
9306 // v = (v * 0x01010101...) >> (Len - 8)
9307 SDValue V;
9310 SDValue Mask01 =
9311 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9312 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9313 } else {
9314 V = Op;
9315 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9316 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9317 V = DAG.getNode(ISD::ADD, dl, VT, V,
9318 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9319 }
9320 }
9321 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9322}
9323
9325 SDLoc dl(Node);
9326 EVT VT = Node->getValueType(0);
9327 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9328 SDValue Op = Node->getOperand(0);
9329 SDValue Mask = Node->getOperand(1);
9330 SDValue VL = Node->getOperand(2);
9331 unsigned Len = VT.getScalarSizeInBits();
9332 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9333
9334 // TODO: Add support for irregular type lengths.
9335 if (!(Len <= 128 && Len % 8 == 0))
9336 return SDValue();
9337
9338 // This is same algorithm of expandCTPOP from
9339 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9340 SDValue Mask55 =
9341 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9342 SDValue Mask33 =
9343 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9344 SDValue Mask0F =
9345 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9346
9347 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9348
9349 // v = v - ((v >> 1) & 0x55555555...)
9350 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9351 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9352 DAG.getConstant(1, dl, ShVT), Mask, VL),
9353 Mask55, Mask, VL);
9354 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9355
9356 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9357 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9358 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9359 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9360 DAG.getConstant(2, dl, ShVT), Mask, VL),
9361 Mask33, Mask, VL);
9362 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9363
9364 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9365 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9366 Mask, VL),
9367 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9368 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9369
9370 if (Len <= 8)
9371 return Op;
9372
9373 // v = (v * 0x01010101...) >> (Len - 8)
9374 SDValue V;
9376 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9377 SDValue Mask01 =
9378 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9379 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9380 } else {
9381 V = Op;
9382 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9383 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9384 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9385 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9386 Mask, VL);
9387 }
9388 }
9389 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9390 Mask, VL);
9391}
9392
9394 SDLoc dl(Node);
9395 EVT VT = Node->getValueType(0);
9396 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9397 SDValue Op = Node->getOperand(0);
9398 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9399
9400 // If the non-ZERO_UNDEF version is supported we can use that instead.
9401 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9403 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9404
9405 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9407 EVT SetCCVT =
9408 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9409 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9410 SDValue Zero = DAG.getConstant(0, dl, VT);
9411 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9412 return DAG.getSelect(dl, VT, SrcIsZero,
9413 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9414 }
9415
9416 // Only expand vector types if we have the appropriate vector bit operations.
9417 // This includes the operations needed to expand CTPOP if it isn't supported.
9418 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9420 !canExpandVectorCTPOP(*this, VT)) ||
9423 return SDValue();
9424
9425 // for now, we do this:
9426 // x = x | (x >> 1);
9427 // x = x | (x >> 2);
9428 // ...
9429 // x = x | (x >>16);
9430 // x = x | (x >>32); // for 64-bit input
9431 // return popcount(~x);
9432 //
9433 // Ref: "Hacker's Delight" by Henry Warren
9434 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9435 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9436 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9437 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9438 }
9439 Op = DAG.getNOT(dl, Op, VT);
9440 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9441}
9442
9444 SDLoc dl(Node);
9445 EVT VT = Node->getValueType(0);
9446 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9447 SDValue Op = Node->getOperand(0);
9448 SDValue Mask = Node->getOperand(1);
9449 SDValue VL = Node->getOperand(2);
9450 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9451
9452 // do this:
9453 // x = x | (x >> 1);
9454 // x = x | (x >> 2);
9455 // ...
9456 // x = x | (x >>16);
9457 // x = x | (x >>32); // for 64-bit input
9458 // return popcount(~x);
9459 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9460 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9461 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9462 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9463 VL);
9464 }
9465 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9466 Mask, VL);
9467 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9468}
9469
9471 const SDLoc &DL, EVT VT, SDValue Op,
9472 unsigned BitWidth) const {
9473 if (BitWidth != 32 && BitWidth != 64)
9474 return SDValue();
9475 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9476 : APInt(64, 0x0218A392CD3D5DBFULL);
9477 const DataLayout &TD = DAG.getDataLayout();
9478 MachinePointerInfo PtrInfo =
9480 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9481 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9482 SDValue Lookup = DAG.getNode(
9483 ISD::SRL, DL, VT,
9484 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9485 DAG.getConstant(DeBruijn, DL, VT)),
9486 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
9488
9490 for (unsigned i = 0; i < BitWidth; i++) {
9491 APInt Shl = DeBruijn.shl(i);
9492 APInt Lshr = Shl.lshr(ShiftAmt);
9493 Table[Lshr.getZExtValue()] = i;
9494 }
9495
9496 // Create a ConstantArray in Constant Pool
9497 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9498 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9499 TD.getPrefTypeAlign(CA->getType()));
9500 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9501 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9502 PtrInfo, MVT::i8);
9503 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9504 return ExtLoad;
9505
9506 EVT SetCCVT =
9507 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9508 SDValue Zero = DAG.getConstant(0, DL, VT);
9509 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9510 return DAG.getSelect(DL, VT, SrcIsZero,
9511 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9512}
9513
9515 SDLoc dl(Node);
9516 EVT VT = Node->getValueType(0);
9517 SDValue Op = Node->getOperand(0);
9518 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9519
9520 // If the non-ZERO_UNDEF version is supported we can use that instead.
9521 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9523 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9524
9525 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9527 EVT SetCCVT =
9528 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9529 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9530 SDValue Zero = DAG.getConstant(0, dl, VT);
9531 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9532 return DAG.getSelect(dl, VT, SrcIsZero,
9533 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9534 }
9535
9536 // Only expand vector types if we have the appropriate vector bit operations.
9537 // This includes the operations needed to expand CTPOP if it isn't supported.
9538 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9541 !canExpandVectorCTPOP(*this, VT)) ||
9545 return SDValue();
9546
9547 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9548 // to be expanded or converted to a libcall.
9551 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9552 return V;
9553
9554 // for now, we use: { return popcount(~x & (x - 1)); }
9555 // unless the target has ctlz but not ctpop, in which case we use:
9556 // { return 32 - nlz(~x & (x-1)); }
9557 // Ref: "Hacker's Delight" by Henry Warren
9558 SDValue Tmp = DAG.getNode(
9559 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9560 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9561
9562 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9564 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9565 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9566 }
9567
9568 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9569}
9570
9572 SDValue Op = Node->getOperand(0);
9573 SDValue Mask = Node->getOperand(1);
9574 SDValue VL = Node->getOperand(2);
9575 SDLoc dl(Node);
9576 EVT VT = Node->getValueType(0);
9577
9578 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9579 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9580 DAG.getAllOnesConstant(dl, VT), Mask, VL);
9581 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9582 DAG.getConstant(1, dl, VT), Mask, VL);
9583 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9584 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9585}
9586
9588 SelectionDAG &DAG) const {
9589 // %cond = to_bool_vec %source
9590 // %splat = splat /*val=*/VL
9591 // %tz = step_vector
9592 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9593 // %r = vp.reduce.umin %v
9594 SDLoc DL(N);
9595 SDValue Source = N->getOperand(0);
9596 SDValue Mask = N->getOperand(1);
9597 SDValue EVL = N->getOperand(2);
9598 EVT SrcVT = Source.getValueType();
9599 EVT ResVT = N->getValueType(0);
9600 EVT ResVecVT =
9601 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9602
9603 // Convert to boolean vector.
9604 if (SrcVT.getScalarType() != MVT::i1) {
9605 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9606 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9607 SrcVT.getVectorElementCount());
9608 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9609 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9610 }
9611
9612 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9613 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9614 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9615 SDValue Select =
9616 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9617 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9618}
9619
9621 SelectionDAG &DAG) const {
9622 SDLoc DL(N);
9623 SDValue Mask = N->getOperand(0);
9624 EVT MaskVT = Mask.getValueType();
9625 EVT BoolVT = MaskVT.getScalarType();
9626
9627 // Find a suitable type for a stepvector.
9628 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
9629 if (MaskVT.isScalableVector())
9630 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
9631 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9632 unsigned EltWidth = TLI.getBitWidthForCttzElements(
9633 BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(),
9634 /*ZeroIsPoison=*/true, &VScaleRange);
9635 EVT StepVT = MVT::getIntegerVT(EltWidth);
9636 EVT StepVecVT = MaskVT.changeVectorElementType(StepVT);
9637
9638 // If promotion is required to make the type legal, do it here; promotion
9639 // of integers within LegalizeVectorOps is looking for types of the same
9640 // size but with a smaller number of larger elements, not the usual larger
9641 // size with the same number of larger elements.
9642 if (TLI.getTypeAction(StepVecVT.getSimpleVT()) ==
9644 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
9645 StepVT = StepVecVT.getVectorElementType();
9646 }
9647
9648 // Zero out lanes with inactive elements, then find the highest remaining
9649 // value from the stepvector.
9650 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
9651 SDValue StepVec = DAG.getStepVector(DL, StepVecVT);
9652 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
9653 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
9654 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
9655}
9656
9658 bool IsNegative) const {
9659 SDLoc dl(N);
9660 EVT VT = N->getValueType(0);
9661 SDValue Op = N->getOperand(0);
9662
9663 // abs(x) -> smax(x,sub(0,x))
9664 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9666 SDValue Zero = DAG.getConstant(0, dl, VT);
9667 Op = DAG.getFreeze(Op);
9668 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9669 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9670 }
9671
9672 // abs(x) -> umin(x,sub(0,x))
9673 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9675 SDValue Zero = DAG.getConstant(0, dl, VT);
9676 Op = DAG.getFreeze(Op);
9677 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9678 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9679 }
9680
9681 // 0 - abs(x) -> smin(x, sub(0,x))
9682 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9684 SDValue Zero = DAG.getConstant(0, dl, VT);
9685 Op = DAG.getFreeze(Op);
9686 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9687 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9688 }
9689
9690 // Only expand vector types if we have the appropriate vector operations.
9691 if (VT.isVector() &&
9693 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9694 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9696 return SDValue();
9697
9698 Op = DAG.getFreeze(Op);
9699 SDValue Shift = DAG.getNode(
9700 ISD::SRA, dl, VT, Op,
9701 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9702 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9703
9704 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9705 if (!IsNegative)
9706 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9707
9708 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9709 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9710}
9711
9713 SDLoc dl(N);
9714 EVT VT = N->getValueType(0);
9715 SDValue LHS = N->getOperand(0);
9716 SDValue RHS = N->getOperand(1);
9717 bool IsSigned = N->getOpcode() == ISD::ABDS;
9718
9719 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9720 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9721 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9722 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9723 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9724 LHS = DAG.getFreeze(LHS);
9725 RHS = DAG.getFreeze(RHS);
9726 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9727 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9728 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9729 }
9730
9731 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9732 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
9733 LHS = DAG.getFreeze(LHS);
9734 RHS = DAG.getFreeze(RHS);
9735 return DAG.getNode(ISD::OR, dl, VT,
9736 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9737 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9738 }
9739
9740 // If the subtract doesn't overflow then just use abs(sub())
9741 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
9742
9743 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
9744 return DAG.getNode(ISD::ABS, dl, VT,
9745 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9746
9747 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
9748 return DAG.getNode(ISD::ABS, dl, VT,
9749 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9750
9751 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9753 LHS = DAG.getFreeze(LHS);
9754 RHS = DAG.getFreeze(RHS);
9755 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9756
9757 // Branchless expansion iff cmp result is allbits:
9758 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9759 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9760 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9761 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9762 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9763 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9764 }
9765
9766 // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9767 // flag if the (scalar) type is illegal as this is more likely to legalize
9768 // cleanly:
9769 // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9770 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9771 SDValue USubO =
9772 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9773 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9774 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9775 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9776 }
9777
9778 // FIXME: Should really try to split the vector in case it's legal on a
9779 // subvector.
9781 return DAG.UnrollVectorOp(N);
9782
9783 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9784 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9785 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9786 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9787}
9788
9790 SDLoc dl(N);
9791 EVT VT = N->getValueType(0);
9792 SDValue LHS = N->getOperand(0);
9793 SDValue RHS = N->getOperand(1);
9794
9795 unsigned Opc = N->getOpcode();
9796 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9797 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9798 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9799 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9800 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9801 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9803 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9804 "Unknown AVG node");
9805
9806 // If the operands are already extended, we can add+shift.
9807 bool IsExt =
9808 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9809 DAG.ComputeNumSignBits(RHS) >= 2) ||
9810 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9811 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9812 if (IsExt) {
9813 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9814 if (!IsFloor)
9815 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9816 return DAG.getNode(ShiftOpc, dl, VT, Sum,
9817 DAG.getShiftAmountConstant(1, VT, dl));
9818 }
9819
9820 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9821 if (VT.isScalarInteger()) {
9822 unsigned BW = VT.getScalarSizeInBits();
9823 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9824 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9825 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9826 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9827 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9828 if (!IsFloor)
9829 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9830 DAG.getConstant(1, dl, ExtVT));
9831 // Just use SRL as we will be truncating away the extended sign bits.
9832 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9833 DAG.getShiftAmountConstant(1, ExtVT, dl));
9834 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9835 }
9836 }
9837
9838 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9839 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9840 SDValue UAddWithOverflow =
9841 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9842
9843 SDValue Sum = UAddWithOverflow.getValue(0);
9844 SDValue Overflow = UAddWithOverflow.getValue(1);
9845
9846 // Right shift the sum by 1
9847 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9848 DAG.getShiftAmountConstant(1, VT, dl));
9849
9850 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9851 SDValue OverflowShl = DAG.getNode(
9852 ISD::SHL, dl, VT, ZeroExtOverflow,
9853 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9854
9855 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9856 }
9857
9858 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9859 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9860 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9861 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9862 LHS = DAG.getFreeze(LHS);
9863 RHS = DAG.getFreeze(RHS);
9864 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9865 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9866 SDValue Shift =
9867 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9868 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9869}
9870
9872 SDLoc dl(N);
9873 EVT VT = N->getValueType(0);
9874 SDValue Op = N->getOperand(0);
9875
9876 if (!VT.isSimple())
9877 return SDValue();
9878
9879 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9880 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9881 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9882 default:
9883 return SDValue();
9884 case MVT::i16:
9885 // Use a rotate by 8. This can be further expanded if necessary.
9886 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9887 case MVT::i32:
9888 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9889 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9890 DAG.getConstant(0xFF00, dl, VT));
9891 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9892 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9893 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9894 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9895 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9896 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9897 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9898 case MVT::i64:
9899 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9900 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9901 DAG.getConstant(255ULL<<8, dl, VT));
9902 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9903 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9904 DAG.getConstant(255ULL<<16, dl, VT));
9905 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9906 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9907 DAG.getConstant(255ULL<<24, dl, VT));
9908 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9909 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9910 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9911 DAG.getConstant(255ULL<<24, dl, VT));
9912 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9913 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9914 DAG.getConstant(255ULL<<16, dl, VT));
9915 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9916 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9917 DAG.getConstant(255ULL<<8, dl, VT));
9918 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9919 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9920 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9921 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9922 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9923 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9924 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9925 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9926 }
9927}
9928
9930 SDLoc dl(N);
9931 EVT VT = N->getValueType(0);
9932 SDValue Op = N->getOperand(0);
9933 SDValue Mask = N->getOperand(1);
9934 SDValue EVL = N->getOperand(2);
9935
9936 if (!VT.isSimple())
9937 return SDValue();
9938
9939 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9940 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9941 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9942 default:
9943 return SDValue();
9944 case MVT::i16:
9945 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9946 Mask, EVL);
9947 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9948 Mask, EVL);
9949 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9950 case MVT::i32:
9951 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9952 Mask, EVL);
9953 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9954 Mask, EVL);
9955 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9956 Mask, EVL);
9957 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9958 Mask, EVL);
9959 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9960 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9961 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9962 Mask, EVL);
9963 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9964 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9965 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9966 case MVT::i64:
9967 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9968 Mask, EVL);
9969 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9970 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9971 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9972 Mask, EVL);
9973 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9974 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9975 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9976 Mask, EVL);
9977 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9978 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9979 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9980 Mask, EVL);
9981 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9982 Mask, EVL);
9983 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9984 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9985 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9986 Mask, EVL);
9987 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9988 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9989 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9990 Mask, EVL);
9991 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9992 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9993 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9994 Mask, EVL);
9995 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9996 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9997 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9998 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9999 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
10000 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10001 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
10002 }
10003}
10004
10006 SDLoc dl(N);
10007 EVT VT = N->getValueType(0);
10008 SDValue Op = N->getOperand(0);
10009 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10010 unsigned Sz = VT.getScalarSizeInBits();
10011
10012 SDValue Tmp, Tmp2, Tmp3;
10013
10014 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10015 // and finally the i1 pairs.
10016 // TODO: We can easily support i4/i2 legal types if any target ever does.
10017 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10018 // Create the masks - repeating the pattern every byte.
10019 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10020 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10021 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10022
10023 // BSWAP if the type is wider than a single byte.
10024 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
10025
10026 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10027 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
10028 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
10029 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
10030 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
10031 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10032
10033 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10034 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
10035 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
10036 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
10037 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
10038 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10039
10040 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10041 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
10042 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
10043 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
10044 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
10045 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10046 return Tmp;
10047 }
10048
10049 Tmp = DAG.getConstant(0, dl, VT);
10050 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
10051 if (I < J)
10052 Tmp2 =
10053 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
10054 else
10055 Tmp2 =
10056 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
10057
10058 APInt Shift = APInt::getOneBitSet(Sz, J);
10059 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
10060 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
10061 }
10062
10063 return Tmp;
10064}
10065
10067 assert(N->getOpcode() == ISD::VP_BITREVERSE);
10068
10069 SDLoc dl(N);
10070 EVT VT = N->getValueType(0);
10071 SDValue Op = N->getOperand(0);
10072 SDValue Mask = N->getOperand(1);
10073 SDValue EVL = N->getOperand(2);
10074 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10075 unsigned Sz = VT.getScalarSizeInBits();
10076
10077 SDValue Tmp, Tmp2, Tmp3;
10078
10079 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10080 // and finally the i1 pairs.
10081 // TODO: We can easily support i4/i2 legal types if any target ever does.
10082 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10083 // Create the masks - repeating the pattern every byte.
10084 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10085 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10086 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10087
10088 // BSWAP if the type is wider than a single byte.
10089 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
10090
10091 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10092 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
10093 Mask, EVL);
10094 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10095 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
10096 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
10097 Mask, EVL);
10098 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
10099 Mask, EVL);
10100 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10101
10102 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10103 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
10104 Mask, EVL);
10105 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10106 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
10107 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
10108 Mask, EVL);
10109 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
10110 Mask, EVL);
10111 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10112
10113 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10114 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
10115 Mask, EVL);
10116 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10117 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
10118 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
10119 Mask, EVL);
10120 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
10121 Mask, EVL);
10122 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10123 return Tmp;
10124 }
10125 return SDValue();
10126}
10127
10128std::pair<SDValue, SDValue>
10130 SelectionDAG &DAG) const {
10131 SDLoc SL(LD);
10132 SDValue Chain = LD->getChain();
10133 SDValue BasePTR = LD->getBasePtr();
10134 EVT SrcVT = LD->getMemoryVT();
10135 EVT DstVT = LD->getValueType(0);
10136 ISD::LoadExtType ExtType = LD->getExtensionType();
10137
10138 if (SrcVT.isScalableVector())
10139 report_fatal_error("Cannot scalarize scalable vector loads");
10140
10141 unsigned NumElem = SrcVT.getVectorNumElements();
10142
10143 EVT SrcEltVT = SrcVT.getScalarType();
10144 EVT DstEltVT = DstVT.getScalarType();
10145
10146 // A vector must always be stored in memory as-is, i.e. without any padding
10147 // between the elements, since various code depend on it, e.g. in the
10148 // handling of a bitcast of a vector type to int, which may be done with a
10149 // vector store followed by an integer load. A vector that does not have
10150 // elements that are byte-sized must therefore be stored as an integer
10151 // built out of the extracted vector elements.
10152 if (!SrcEltVT.isByteSized()) {
10153 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10154 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
10155
10156 unsigned NumSrcBits = SrcVT.getSizeInBits();
10157 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
10158
10159 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10160 SDValue SrcEltBitMask = DAG.getConstant(
10161 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
10162
10163 // Load the whole vector and avoid masking off the top bits as it makes
10164 // the codegen worse.
10165 SDValue Load =
10166 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
10167 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
10168 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10169
10171 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10172 unsigned ShiftIntoIdx =
10173 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10174 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10175 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
10176 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
10177 SDValue Elt =
10178 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
10179 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
10180
10181 if (ExtType != ISD::NON_EXTLOAD) {
10182 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
10183 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
10184 }
10185
10186 Vals.push_back(Scalar);
10187 }
10188
10189 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10190 return std::make_pair(Value, Load.getValue(1));
10191 }
10192
10193 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10194 assert(SrcEltVT.isByteSized());
10195
10197 SmallVector<SDValue, 8> LoadChains;
10198
10199 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10200 SDValue ScalarLoad = DAG.getExtLoad(
10201 ExtType, SL, DstEltVT, Chain, BasePTR,
10202 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
10203 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10204
10205 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10206
10207 Vals.push_back(ScalarLoad.getValue(0));
10208 LoadChains.push_back(ScalarLoad.getValue(1));
10209 }
10210
10211 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10212 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10213
10214 return std::make_pair(Value, NewChain);
10215}
10216
10218 SelectionDAG &DAG) const {
10219 SDLoc SL(ST);
10220
10221 SDValue Chain = ST->getChain();
10222 SDValue BasePtr = ST->getBasePtr();
10223 SDValue Value = ST->getValue();
10224 EVT StVT = ST->getMemoryVT();
10225
10226 if (StVT.isScalableVector())
10227 report_fatal_error("Cannot scalarize scalable vector stores");
10228
10229 // The type of the data we want to save
10230 EVT RegVT = Value.getValueType();
10231 EVT RegSclVT = RegVT.getScalarType();
10232
10233 // The type of data as saved in memory.
10234 EVT MemSclVT = StVT.getScalarType();
10235
10236 unsigned NumElem = StVT.getVectorNumElements();
10237
10238 // A vector must always be stored in memory as-is, i.e. without any padding
10239 // between the elements, since various code depend on it, e.g. in the
10240 // handling of a bitcast of a vector type to int, which may be done with a
10241 // vector store followed by an integer load. A vector that does not have
10242 // elements that are byte-sized must therefore be stored as an integer
10243 // built out of the extracted vector elements.
10244 if (!MemSclVT.isByteSized()) {
10245 unsigned NumBits = StVT.getSizeInBits();
10246 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10247
10248 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10249
10250 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10251 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10252 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10253 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10254 unsigned ShiftIntoIdx =
10255 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10256 SDValue ShiftAmount =
10257 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10258 SDValue ShiftedElt =
10259 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10260 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10261 }
10262
10263 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10264 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10265 ST->getAAInfo());
10266 }
10267
10268 // Store Stride in bytes
10269 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10270 assert(Stride && "Zero stride!");
10271 // Extract each of the elements from the original vector and save them into
10272 // memory individually.
10274 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10275 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10276
10277 SDValue Ptr =
10278 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10279
10280 // This scalar TruncStore may be illegal, but we legalize it later.
10281 SDValue Store = DAG.getTruncStore(
10282 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10283 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10284 ST->getAAInfo());
10285
10286 Stores.push_back(Store);
10287 }
10288
10289 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10290}
10291
10292std::pair<SDValue, SDValue>
10294 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10295 "unaligned indexed loads not implemented!");
10296 SDValue Chain = LD->getChain();
10297 SDValue Ptr = LD->getBasePtr();
10298 EVT VT = LD->getValueType(0);
10299 EVT LoadedVT = LD->getMemoryVT();
10300 SDLoc dl(LD);
10301 auto &MF = DAG.getMachineFunction();
10302
10303 if (VT.isFloatingPoint() || VT.isVector()) {
10304 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10305 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10306 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10307 LoadedVT.isVector()) {
10308 // Scalarize the load and let the individual components be handled.
10309 return scalarizeVectorLoad(LD, DAG);
10310 }
10311
10312 // Expand to a (misaligned) integer load of the same size,
10313 // then bitconvert to floating point or vector.
10314 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10315 LD->getMemOperand());
10316 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10317 if (LoadedVT != VT)
10318 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10319 ISD::ANY_EXTEND, dl, VT, Result);
10320
10321 return std::make_pair(Result, newLoad.getValue(1));
10322 }
10323
10324 // Copy the value to a (aligned) stack slot using (unaligned) integer
10325 // loads and stores, then do a (aligned) load from the stack slot.
10326 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10327 unsigned LoadedBytes = LoadedVT.getStoreSize();
10328 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10329 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10330
10331 // Make sure the stack slot is also aligned for the register type.
10332 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10333 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10335 SDValue StackPtr = StackBase;
10336 unsigned Offset = 0;
10337
10338 EVT PtrVT = Ptr.getValueType();
10339 EVT StackPtrVT = StackPtr.getValueType();
10340
10341 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10342 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10343
10344 // Do all but one copies using the full register width.
10345 for (unsigned i = 1; i < NumRegs; i++) {
10346 // Load one integer register's worth from the original location.
10347 SDValue Load = DAG.getLoad(
10348 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10349 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10350 // Follow the load with a store to the stack slot. Remember the store.
10351 Stores.push_back(DAG.getStore(
10352 Load.getValue(1), dl, Load, StackPtr,
10353 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10354 // Increment the pointers.
10355 Offset += RegBytes;
10356
10357 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10358 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10359 }
10360
10361 // The last copy may be partial. Do an extending load.
10362 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10363 8 * (LoadedBytes - Offset));
10364 SDValue Load = DAG.getExtLoad(
10365 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10366 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
10367 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10368 // Follow the load with a store to the stack slot. Remember the store.
10369 // On big-endian machines this requires a truncating store to ensure
10370 // that the bits end up in the right place.
10371 Stores.push_back(DAG.getTruncStore(
10372 Load.getValue(1), dl, Load, StackPtr,
10373 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10374
10375 // The order of the stores doesn't matter - say it with a TokenFactor.
10376 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10377
10378 // Finally, perform the original load only redirected to the stack slot.
10379 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10380 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10381 LoadedVT);
10382
10383 // Callers expect a MERGE_VALUES node.
10384 return std::make_pair(Load, TF);
10385 }
10386
10387 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10388 "Unaligned load of unsupported type.");
10389
10390 // Compute the new VT that is half the size of the old one. This is an
10391 // integer MVT.
10392 unsigned NumBits = LoadedVT.getSizeInBits();
10393 EVT NewLoadedVT;
10394 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10395 NumBits >>= 1;
10396
10397 Align Alignment = LD->getBaseAlign();
10398 unsigned IncrementSize = NumBits / 8;
10399 ISD::LoadExtType HiExtType = LD->getExtensionType();
10400
10401 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10402 if (HiExtType == ISD::NON_EXTLOAD)
10403 HiExtType = ISD::ZEXTLOAD;
10404
10405 // Load the value in two parts
10406 SDValue Lo, Hi;
10407 if (DAG.getDataLayout().isLittleEndian()) {
10408 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10409 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10410 LD->getAAInfo());
10411
10412 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10413 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10414 LD->getPointerInfo().getWithOffset(IncrementSize),
10415 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10416 LD->getAAInfo());
10417 } else {
10418 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10419 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10420 LD->getAAInfo());
10421
10422 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10423 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10424 LD->getPointerInfo().getWithOffset(IncrementSize),
10425 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10426 LD->getAAInfo());
10427 }
10428
10429 // aggregate the two parts
10430 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10431 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10432 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10433
10434 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10435 Hi.getValue(1));
10436
10437 return std::make_pair(Result, TF);
10438}
10439
10441 SelectionDAG &DAG) const {
10442 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10443 "unaligned indexed stores not implemented!");
10444 SDValue Chain = ST->getChain();
10445 SDValue Ptr = ST->getBasePtr();
10446 SDValue Val = ST->getValue();
10447 EVT VT = Val.getValueType();
10448 Align Alignment = ST->getBaseAlign();
10449 auto &MF = DAG.getMachineFunction();
10450 EVT StoreMemVT = ST->getMemoryVT();
10451
10452 SDLoc dl(ST);
10453 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10454 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10455 if (isTypeLegal(intVT)) {
10456 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10457 StoreMemVT.isVector()) {
10458 // Scalarize the store and let the individual components be handled.
10459 SDValue Result = scalarizeVectorStore(ST, DAG);
10460 return Result;
10461 }
10462 // Expand to a bitconvert of the value to the integer type of the
10463 // same size, then a (misaligned) int store.
10464 // FIXME: Does not handle truncating floating point stores!
10465 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10466 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10467 Alignment, ST->getMemOperand()->getFlags());
10468 return Result;
10469 }
10470 // Do a (aligned) store to a stack slot, then copy from the stack slot
10471 // to the final destination using (unaligned) integer loads and stores.
10472 MVT RegVT = getRegisterType(
10473 *DAG.getContext(),
10474 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10475 EVT PtrVT = Ptr.getValueType();
10476 unsigned StoredBytes = StoreMemVT.getStoreSize();
10477 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10478 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10479
10480 // Make sure the stack slot is also aligned for the register type.
10481 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10482 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10483
10484 // Perform the original store, only redirected to the stack slot.
10485 SDValue Store = DAG.getTruncStore(
10486 Chain, dl, Val, StackPtr,
10487 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10488
10489 EVT StackPtrVT = StackPtr.getValueType();
10490
10491 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10492 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10494 unsigned Offset = 0;
10495
10496 // Do all but one copies using the full register width.
10497 for (unsigned i = 1; i < NumRegs; i++) {
10498 // Load one integer register's worth from the stack slot.
10499 SDValue Load = DAG.getLoad(
10500 RegVT, dl, Store, StackPtr,
10501 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10502 // Store it to the final location. Remember the store.
10503 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10504 ST->getPointerInfo().getWithOffset(Offset),
10505 ST->getBaseAlign(),
10506 ST->getMemOperand()->getFlags()));
10507 // Increment the pointers.
10508 Offset += RegBytes;
10509 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10510 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10511 }
10512
10513 // The last store may be partial. Do a truncating store. On big-endian
10514 // machines this requires an extending load from the stack slot to ensure
10515 // that the bits are in the right place.
10516 EVT LoadMemVT =
10517 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10518
10519 // Load from the stack slot.
10520 SDValue Load = DAG.getExtLoad(
10521 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10522 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10523
10524 Stores.push_back(DAG.getTruncStore(
10525 Load.getValue(1), dl, Load, Ptr,
10526 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10527 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10528 // The order of the stores doesn't matter - say it with a TokenFactor.
10529 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10530 return Result;
10531 }
10532
10533 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10534 "Unaligned store of unknown type.");
10535 // Get the half-size VT
10536 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10537 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10538 unsigned IncrementSize = NumBits / 8;
10539
10540 // Divide the stored value in two parts.
10541 SDValue ShiftAmount =
10542 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10543 SDValue Lo = Val;
10544 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10545 // fold and not use the upper bits. A smaller constant may be easier to
10546 // materialize.
10547 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10548 Lo = DAG.getNode(
10549 ISD::AND, dl, VT, Lo,
10550 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10551 VT));
10552 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10553
10554 // Store the two parts
10555 SDValue Store1, Store2;
10556 Store1 = DAG.getTruncStore(Chain, dl,
10557 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10558 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10559 ST->getMemOperand()->getFlags());
10560
10561 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10562 Store2 = DAG.getTruncStore(
10563 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10564 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10565 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10566
10567 SDValue Result =
10568 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10569 return Result;
10570}
10571
10572SDValue
10574 const SDLoc &DL, EVT DataVT,
10575 SelectionDAG &DAG,
10576 bool IsCompressedMemory) const {
10577 SDValue Increment;
10578 EVT AddrVT = Addr.getValueType();
10579 EVT MaskVT = Mask.getValueType();
10580 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10581 "Incompatible types of Data and Mask");
10582 if (IsCompressedMemory) {
10583 if (DataVT.isScalableVector())
10585 "Cannot currently handle compressed memory with scalable vectors");
10586 // Incrementing the pointer according to number of '1's in the mask.
10587 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10588 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10589 if (MaskIntVT.getSizeInBits() < 32) {
10590 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10591 MaskIntVT = MVT::i32;
10592 }
10593
10594 // Count '1's with POPCNT.
10595 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10596 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10597 // Scale is an element size in bytes.
10598 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10599 AddrVT);
10600 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10601 } else if (DataVT.isScalableVector()) {
10602 Increment = DAG.getVScale(DL, AddrVT,
10603 APInt(AddrVT.getFixedSizeInBits(),
10604 DataVT.getStoreSize().getKnownMinValue()));
10605 } else
10606 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10607
10608 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10609}
10610
10612 EVT VecVT, const SDLoc &dl,
10613 ElementCount SubEC) {
10614 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10615 "Cannot index a scalable vector within a fixed-width vector");
10616
10617 unsigned NElts = VecVT.getVectorMinNumElements();
10618 unsigned NumSubElts = SubEC.getKnownMinValue();
10619 EVT IdxVT = Idx.getValueType();
10620
10621 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10622 // If this is a constant index and we know the value plus the number of the
10623 // elements in the subvector minus one is less than the minimum number of
10624 // elements then it's safe to return Idx.
10625 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10626 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10627 return Idx;
10628 SDValue VS =
10629 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10630 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10631 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10632 DAG.getConstant(NumSubElts, dl, IdxVT));
10633 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10634 }
10635 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10636 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10637 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10638 DAG.getConstant(Imm, dl, IdxVT));
10639 }
10640 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10641 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10642 DAG.getConstant(MaxIndex, dl, IdxVT));
10643}
10644
10646 SDValue VecPtr, EVT VecVT,
10647 SDValue Index) const {
10649 DAG, VecPtr, VecVT,
10651 Index);
10652}
10653
10655 SDValue VecPtr, EVT VecVT,
10656 EVT SubVecVT,
10657 SDValue Index) const {
10658 SDLoc dl(Index);
10659 // Make sure the index type is big enough to compute in.
10660 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10661
10662 EVT EltVT = VecVT.getVectorElementType();
10663
10664 // Calculate the element offset and add it to the pointer.
10665 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10666 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10667 "Converting bits to bytes lost precision");
10668 assert(SubVecVT.getVectorElementType() == EltVT &&
10669 "Sub-vector must be a vector with matching element type");
10670 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10671 SubVecVT.getVectorElementCount());
10672
10673 EVT IdxVT = Index.getValueType();
10674 if (SubVecVT.isScalableVector())
10675 Index =
10676 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10677 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10678
10679 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10680 DAG.getConstant(EltSize, dl, IdxVT));
10681 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10682}
10683
10684//===----------------------------------------------------------------------===//
10685// Implementation of Emulated TLS Model
10686//===----------------------------------------------------------------------===//
10687
10689 SelectionDAG &DAG) const {
10690 // Access to address of TLS varialbe xyz is lowered to a function call:
10691 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10692 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10693 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10694 SDLoc dl(GA);
10695
10696 ArgListTy Args;
10697 const GlobalValue *GV =
10699 SmallString<32> NameString("__emutls_v.");
10700 NameString += GV->getName();
10701 StringRef EmuTlsVarName(NameString);
10702 const GlobalVariable *EmuTlsVar =
10703 GV->getParent()->getNamedGlobal(EmuTlsVarName);
10704 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10705 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
10706
10707 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10708
10710 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10711 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10712 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10713
10714 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10715 // At last for X86 targets, maybe good for other targets too?
10717 MFI.setAdjustsStack(true); // Is this only for X86 target?
10718 MFI.setHasCalls(true);
10719
10720 assert((GA->getOffset() == 0) &&
10721 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10722 return CallResult.first;
10723}
10724
10726 SelectionDAG &DAG) const {
10727 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10728 if (!isCtlzFast())
10729 return SDValue();
10730 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10731 SDLoc dl(Op);
10732 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10733 EVT VT = Op.getOperand(0).getValueType();
10734 SDValue Zext = Op.getOperand(0);
10735 if (VT.bitsLT(MVT::i32)) {
10736 VT = MVT::i32;
10737 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10738 }
10739 unsigned Log2b = Log2_32(VT.getSizeInBits());
10740 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10741 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10742 DAG.getConstant(Log2b, dl, MVT::i32));
10743 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10744 }
10745 return SDValue();
10746}
10747
10749 SDValue Op0 = Node->getOperand(0);
10750 SDValue Op1 = Node->getOperand(1);
10751 EVT VT = Op0.getValueType();
10752 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10753 unsigned Opcode = Node->getOpcode();
10754 SDLoc DL(Node);
10755
10756 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10757 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10759 Op0 = DAG.getFreeze(Op0);
10760 SDValue Zero = DAG.getConstant(0, DL, VT);
10761 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10762 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10763 }
10764
10765 // umin(x,y) -> sub(x,usubsat(x,y))
10766 // TODO: Missing freeze(Op0)?
10767 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10769 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10770 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10771 }
10772
10773 // umax(x,y) -> add(x,usubsat(y,x))
10774 // TODO: Missing freeze(Op0)?
10775 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10777 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10778 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10779 }
10780
10781 // FIXME: Should really try to split the vector in case it's legal on a
10782 // subvector.
10784 return DAG.UnrollVectorOp(Node);
10785
10786 // Attempt to find an existing SETCC node that we can reuse.
10787 // TODO: Do we need a generic doesSETCCNodeExist?
10788 // TODO: Missing freeze(Op0)/freeze(Op1)?
10789 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10790 ISD::CondCode PrefCommuteCC,
10791 ISD::CondCode AltCommuteCC) {
10792 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10793 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10794 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10795 {Op0, Op1, DAG.getCondCode(CC)})) {
10796 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10797 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10798 }
10799 }
10800 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10801 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10802 {Op0, Op1, DAG.getCondCode(CC)})) {
10803 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10804 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10805 }
10806 }
10807 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10808 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10809 };
10810
10811 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10812 // -> Y = (A < B) ? B : A
10813 // -> Y = (A >= B) ? A : B
10814 // -> Y = (A <= B) ? B : A
10815 switch (Opcode) {
10816 case ISD::SMAX:
10817 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10818 case ISD::SMIN:
10819 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10820 case ISD::UMAX:
10821 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10822 case ISD::UMIN:
10823 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10824 }
10825
10826 llvm_unreachable("How did we get here?");
10827}
10828
10830 unsigned Opcode = Node->getOpcode();
10831 SDValue LHS = Node->getOperand(0);
10832 SDValue RHS = Node->getOperand(1);
10833 EVT VT = LHS.getValueType();
10834 SDLoc dl(Node);
10835
10836 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10837 assert(VT.isInteger() && "Expected operands to be integers");
10838
10839 // usub.sat(a, b) -> umax(a, b) - b
10840 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10841 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10842 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10843 }
10844
10845 // uadd.sat(a, b) -> umin(a, ~b) + b
10846 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10847 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10848 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10849 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10850 }
10851
10852 unsigned OverflowOp;
10853 switch (Opcode) {
10854 case ISD::SADDSAT:
10855 OverflowOp = ISD::SADDO;
10856 break;
10857 case ISD::UADDSAT:
10858 OverflowOp = ISD::UADDO;
10859 break;
10860 case ISD::SSUBSAT:
10861 OverflowOp = ISD::SSUBO;
10862 break;
10863 case ISD::USUBSAT:
10864 OverflowOp = ISD::USUBO;
10865 break;
10866 default:
10867 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10868 "addition or subtraction node.");
10869 }
10870
10871 // FIXME: Should really try to split the vector in case it's legal on a
10872 // subvector.
10874 return DAG.UnrollVectorOp(Node);
10875
10876 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10877 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10878 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10879 SDValue SumDiff = Result.getValue(0);
10880 SDValue Overflow = Result.getValue(1);
10881 SDValue Zero = DAG.getConstant(0, dl, VT);
10882 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10883
10884 if (Opcode == ISD::UADDSAT) {
10886 // (LHS + RHS) | OverflowMask
10887 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10888 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10889 }
10890 // Overflow ? 0xffff.... : (LHS + RHS)
10891 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10892 }
10893
10894 if (Opcode == ISD::USUBSAT) {
10896 // (LHS - RHS) & ~OverflowMask
10897 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10898 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10899 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10900 }
10901 // Overflow ? 0 : (LHS - RHS)
10902 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10903 }
10904
10905 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10908
10909 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10910 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10911
10912 // If either of the operand signs are known, then they are guaranteed to
10913 // only saturate in one direction. If non-negative they will saturate
10914 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10915 //
10916 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10917 // sign of 'y' has to be flipped.
10918
10919 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10920 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10921 : KnownRHS.isNegative();
10922 if (LHSIsNonNegative || RHSIsNonNegative) {
10923 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10924 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10925 }
10926
10927 bool LHSIsNegative = KnownLHS.isNegative();
10928 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10929 : KnownRHS.isNonNegative();
10930 if (LHSIsNegative || RHSIsNegative) {
10931 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10932 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10933 }
10934 }
10935
10936 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10938 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10939 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10940 DAG.getConstant(BitWidth - 1, dl, VT));
10941 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10942 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10943}
10944
10946 unsigned Opcode = Node->getOpcode();
10947 SDValue LHS = Node->getOperand(0);
10948 SDValue RHS = Node->getOperand(1);
10949 EVT VT = LHS.getValueType();
10950 EVT ResVT = Node->getValueType(0);
10951 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10952 SDLoc dl(Node);
10953
10954 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10955 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10956 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10957 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10958
10959 // We can't perform arithmetic on i1 values. Extending them would
10960 // probably result in worse codegen, so let's just use two selects instead.
10961 // Some targets are also just better off using selects rather than subtraction
10962 // because one of the conditions can be merged with one of the selects.
10963 // And finally, if we don't know the contents of high bits of a boolean value
10964 // we can't perform any arithmetic either.
10965 if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10967 SDValue SelectZeroOrOne =
10968 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10969 DAG.getConstant(0, dl, ResVT));
10970 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10971 SelectZeroOrOne);
10972 }
10973
10975 std::swap(IsGT, IsLT);
10976 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10977 ResVT);
10978}
10979
10981 unsigned Opcode = Node->getOpcode();
10982 bool IsSigned = Opcode == ISD::SSHLSAT;
10983 SDValue LHS = Node->getOperand(0);
10984 SDValue RHS = Node->getOperand(1);
10985 EVT VT = LHS.getValueType();
10986 SDLoc dl(Node);
10987
10988 assert((Node->getOpcode() == ISD::SSHLSAT ||
10989 Node->getOpcode() == ISD::USHLSAT) &&
10990 "Expected a SHLSAT opcode");
10991 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10992 assert(VT.isInteger() && "Expected operands to be integers");
10993
10995 return DAG.UnrollVectorOp(Node);
10996
10997 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10998
10999 unsigned BW = VT.getScalarSizeInBits();
11000 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11001 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
11002 SDValue Orig =
11003 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
11004
11005 SDValue SatVal;
11006 if (IsSigned) {
11007 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
11008 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
11009 SDValue Cond =
11010 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
11011 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
11012 } else {
11013 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
11014 }
11015 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
11016 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
11017}
11018
11020 bool Signed, SDValue &Lo, SDValue &Hi,
11021 SDValue LHS, SDValue RHS,
11022 SDValue HiLHS, SDValue HiRHS) const {
11023 EVT VT = LHS.getValueType();
11024 assert(RHS.getValueType() == VT && "Mismatching operand types");
11025
11026 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
11027 assert((!Signed || !HiLHS) &&
11028 "Signed flag should only be set when HiLHS and RiRHS are null");
11029
11030 // We'll expand the multiplication by brute force because we have no other
11031 // options. This is a trivially-generalized version of the code from
11032 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
11033 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11034 // sign bits while calculating the Hi half.
11035 unsigned Bits = VT.getSizeInBits();
11036 unsigned HalfBits = Bits / 2;
11037 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
11038 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
11039 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
11040
11041 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
11042 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
11043
11044 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
11045 // This is always an unsigned shift.
11046 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
11047
11048 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11049 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
11050 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
11051
11052 SDValue U =
11053 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
11054 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
11055 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
11056
11057 SDValue V =
11058 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
11059 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
11060
11061 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
11062 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
11063
11064 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
11065 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
11066
11067 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11068 // the products to Hi.
11069 if (HiLHS) {
11070 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
11071 DAG.getNode(ISD::ADD, dl, VT,
11072 DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS),
11073 DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS)));
11074 }
11075}
11076
11078 bool Signed, const SDValue LHS,
11079 const SDValue RHS, SDValue &Lo,
11080 SDValue &Hi) const {
11081 EVT VT = LHS.getValueType();
11082 assert(RHS.getValueType() == VT && "Mismatching operand types");
11083 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
11084 // We can fall back to a libcall with an illegal type for the MUL if we
11085 // have a libcall big enough.
11086 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11087 if (WideVT == MVT::i16)
11088 LC = RTLIB::MUL_I16;
11089 else if (WideVT == MVT::i32)
11090 LC = RTLIB::MUL_I32;
11091 else if (WideVT == MVT::i64)
11092 LC = RTLIB::MUL_I64;
11093 else if (WideVT == MVT::i128)
11094 LC = RTLIB::MUL_I128;
11095
11096 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
11097 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11098 return;
11099 }
11100
11101 SDValue HiLHS, HiRHS;
11102 if (Signed) {
11103 // The high part is obtained by SRA'ing all but one of the bits of low
11104 // part.
11105 unsigned LoSize = VT.getFixedSizeInBits();
11106 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
11107 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
11108 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
11109 } else {
11110 HiLHS = DAG.getConstant(0, dl, VT);
11111 HiRHS = DAG.getConstant(0, dl, VT);
11112 }
11113
11114 // Attempt a libcall.
11115 SDValue Ret;
11117 CallOptions.setIsSigned(Signed);
11118 CallOptions.setIsPostTypeLegalization(true);
11120 // Halves of WideVT are packed into registers in different order
11121 // depending on platform endianness. This is usually handled by
11122 // the C calling convention, but we can't defer to it in
11123 // the legalizer.
11124 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11125 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11126 } else {
11127 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11128 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11129 }
11130 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
11131 "Ret value is a collection of constituent nodes holding result.");
11132 if (DAG.getDataLayout().isLittleEndian()) {
11133 // Same as above.
11134 Lo = Ret.getOperand(0);
11135 Hi = Ret.getOperand(1);
11136 } else {
11137 Lo = Ret.getOperand(1);
11138 Hi = Ret.getOperand(0);
11139 }
11140}
11141
11142SDValue
11144 assert((Node->getOpcode() == ISD::SMULFIX ||
11145 Node->getOpcode() == ISD::UMULFIX ||
11146 Node->getOpcode() == ISD::SMULFIXSAT ||
11147 Node->getOpcode() == ISD::UMULFIXSAT) &&
11148 "Expected a fixed point multiplication opcode");
11149
11150 SDLoc dl(Node);
11151 SDValue LHS = Node->getOperand(0);
11152 SDValue RHS = Node->getOperand(1);
11153 EVT VT = LHS.getValueType();
11154 unsigned Scale = Node->getConstantOperandVal(2);
11155 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
11156 Node->getOpcode() == ISD::UMULFIXSAT);
11157 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
11158 Node->getOpcode() == ISD::SMULFIXSAT);
11159 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11160 unsigned VTSize = VT.getScalarSizeInBits();
11161
11162 if (!Scale) {
11163 // [us]mul.fix(a, b, 0) -> mul(a, b)
11164 if (!Saturating) {
11166 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11167 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
11168 SDValue Result =
11169 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11170 SDValue Product = Result.getValue(0);
11171 SDValue Overflow = Result.getValue(1);
11172 SDValue Zero = DAG.getConstant(0, dl, VT);
11173
11174 APInt MinVal = APInt::getSignedMinValue(VTSize);
11175 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
11176 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11177 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11178 // Xor the inputs, if resulting sign bit is 0 the product will be
11179 // positive, else negative.
11180 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
11181 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
11182 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
11183 return DAG.getSelect(dl, VT, Overflow, Result, Product);
11184 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
11185 SDValue Result =
11186 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11187 SDValue Product = Result.getValue(0);
11188 SDValue Overflow = Result.getValue(1);
11189
11190 APInt MaxVal = APInt::getMaxValue(VTSize);
11191 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11192 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
11193 }
11194 }
11195
11196 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11197 "Expected scale to be less than the number of bits if signed or at "
11198 "most the number of bits if unsigned.");
11199 assert(LHS.getValueType() == RHS.getValueType() &&
11200 "Expected both operands to be the same type");
11201
11202 // Get the upper and lower bits of the result.
11203 SDValue Lo, Hi;
11204 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11205 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11206 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11207 if (VT.isVector())
11208 WideVT =
11210 if (isOperationLegalOrCustom(LoHiOp, VT)) {
11211 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11212 Lo = Result.getValue(0);
11213 Hi = Result.getValue(1);
11214 } else if (isOperationLegalOrCustom(HiOp, VT)) {
11215 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11216 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11217 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11218 // Try for a multiplication using a wider type.
11219 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11220 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11221 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11222 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11223 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11224 SDValue Shifted =
11225 DAG.getNode(ISD::SRA, dl, WideVT, Res,
11226 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11227 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11228 } else if (VT.isVector()) {
11229 return SDValue();
11230 } else {
11231 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11232 }
11233
11234 if (Scale == VTSize)
11235 // Result is just the top half since we'd be shifting by the width of the
11236 // operand. Overflow impossible so this works for both UMULFIX and
11237 // UMULFIXSAT.
11238 return Hi;
11239
11240 // The result will need to be shifted right by the scale since both operands
11241 // are scaled. The result is given to us in 2 halves, so we only want part of
11242 // both in the result.
11243 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11244 DAG.getShiftAmountConstant(Scale, VT, dl));
11245 if (!Saturating)
11246 return Result;
11247
11248 if (!Signed) {
11249 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11250 // widened multiplication) aren't all zeroes.
11251
11252 // Saturate to max if ((Hi >> Scale) != 0),
11253 // which is the same as if (Hi > ((1 << Scale) - 1))
11254 APInt MaxVal = APInt::getMaxValue(VTSize);
11255 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11256 dl, VT);
11257 Result = DAG.getSelectCC(dl, Hi, LowMask,
11258 DAG.getConstant(MaxVal, dl, VT), Result,
11259 ISD::SETUGT);
11260
11261 return Result;
11262 }
11263
11264 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11265 // widened multiplication) aren't all ones or all zeroes.
11266
11267 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11268 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11269
11270 if (Scale == 0) {
11271 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11272 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11273 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11274 // Saturated to SatMin if wide product is negative, and SatMax if wide
11275 // product is positive ...
11276 SDValue Zero = DAG.getConstant(0, dl, VT);
11277 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11278 ISD::SETLT);
11279 // ... but only if we overflowed.
11280 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11281 }
11282
11283 // We handled Scale==0 above so all the bits to examine is in Hi.
11284
11285 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11286 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11287 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11288 dl, VT);
11289 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11290 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11291 // which is the same as if (HI < (-1 << (Scale - 1))
11292 SDValue HighMask =
11293 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11294 dl, VT);
11295 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11296 return Result;
11297}
11298
11299SDValue
11301 SDValue LHS, SDValue RHS,
11302 unsigned Scale, SelectionDAG &DAG) const {
11303 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11304 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11305 "Expected a fixed point division opcode");
11306
11307 EVT VT = LHS.getValueType();
11308 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11309 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11310 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11311
11312 // If there is enough room in the type to upscale the LHS or downscale the
11313 // RHS before the division, we can perform it in this type without having to
11314 // resize. For signed operations, the LHS headroom is the number of
11315 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11316 // The headroom for the RHS is the number of trailing zeroes.
11317 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11319 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11320
11321 // For signed saturating operations, we need to be able to detect true integer
11322 // division overflow; that is, when you have MIN / -EPS. However, this
11323 // is undefined behavior and if we emit divisions that could take such
11324 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11325 // example).
11326 // Avoid this by requiring an extra bit so that we never get this case.
11327 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11328 // signed saturating division, we need to emit a whopping 32-bit division.
11329 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11330 return SDValue();
11331
11332 unsigned LHSShift = std::min(LHSLead, Scale);
11333 unsigned RHSShift = Scale - LHSShift;
11334
11335 // At this point, we know that if we shift the LHS up by LHSShift and the
11336 // RHS down by RHSShift, we can emit a regular division with a final scaling
11337 // factor of Scale.
11338
11339 if (LHSShift)
11340 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11341 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11342 if (RHSShift)
11343 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11344 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11345
11346 SDValue Quot;
11347 if (Signed) {
11348 // For signed operations, if the resulting quotient is negative and the
11349 // remainder is nonzero, subtract 1 from the quotient to round towards
11350 // negative infinity.
11351 SDValue Rem;
11352 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11353 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11354 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11355 if (isTypeLegal(VT) &&
11357 Quot = DAG.getNode(ISD::SDIVREM, dl,
11358 DAG.getVTList(VT, VT),
11359 LHS, RHS);
11360 Rem = Quot.getValue(1);
11361 Quot = Quot.getValue(0);
11362 } else {
11363 Quot = DAG.getNode(ISD::SDIV, dl, VT,
11364 LHS, RHS);
11365 Rem = DAG.getNode(ISD::SREM, dl, VT,
11366 LHS, RHS);
11367 }
11368 SDValue Zero = DAG.getConstant(0, dl, VT);
11369 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11370 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11371 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11372 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11373 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11374 DAG.getConstant(1, dl, VT));
11375 Quot = DAG.getSelect(dl, VT,
11376 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11377 Sub1, Quot);
11378 } else
11379 Quot = DAG.getNode(ISD::UDIV, dl, VT,
11380 LHS, RHS);
11381
11382 return Quot;
11383}
11384
11386 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11387 SDLoc dl(Node);
11388 SDValue LHS = Node->getOperand(0);
11389 SDValue RHS = Node->getOperand(1);
11390 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11391
11392 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11393 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11394 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11395 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11396 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11397 { LHS, RHS, CarryIn });
11398 Result = SDValue(NodeCarry.getNode(), 0);
11399 Overflow = SDValue(NodeCarry.getNode(), 1);
11400 return;
11401 }
11402
11403 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11404 LHS.getValueType(), LHS, RHS);
11405
11406 EVT ResultType = Node->getValueType(1);
11407 EVT SetCCType = getSetCCResultType(
11408 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11409 SDValue SetCC;
11410 if (IsAdd && isOneConstant(RHS)) {
11411 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11412 // the live range of X. We assume comparing with 0 is cheap.
11413 // The general case (X + C) < C is not necessarily beneficial. Although we
11414 // reduce the live range of X, we may introduce the materialization of
11415 // constant C.
11416 SetCC =
11417 DAG.getSetCC(dl, SetCCType, Result,
11418 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11419 } else if (IsAdd && isAllOnesConstant(RHS)) {
11420 // Special case: uaddo X, -1 overflows if X != 0.
11421 SetCC =
11422 DAG.getSetCC(dl, SetCCType, LHS,
11423 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11424 } else {
11425 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11426 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11427 }
11428 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11429}
11430
11432 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11433 SDLoc dl(Node);
11434 SDValue LHS = Node->getOperand(0);
11435 SDValue RHS = Node->getOperand(1);
11436 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11437
11438 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11439 LHS.getValueType(), LHS, RHS);
11440
11441 EVT ResultType = Node->getValueType(1);
11442 EVT OType = getSetCCResultType(
11443 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11444
11445 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11446 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11447 if (isOperationLegal(OpcSat, LHS.getValueType())) {
11448 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11449 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11450 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11451 return;
11452 }
11453
11454 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11455
11456 // For an addition, the result should be less than one of the operands (LHS)
11457 // if and only if the other operand (RHS) is negative, otherwise there will
11458 // be overflow.
11459 // For a subtraction, the result should be less than one of the operands
11460 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11461 // otherwise there will be overflow.
11462 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11463 SDValue ConditionRHS =
11464 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11465
11466 Overflow = DAG.getBoolExtOrTrunc(
11467 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11468 ResultType, ResultType);
11469}
11470
11472 SDValue &Overflow, SelectionDAG &DAG) const {
11473 SDLoc dl(Node);
11474 EVT VT = Node->getValueType(0);
11475 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11476 SDValue LHS = Node->getOperand(0);
11477 SDValue RHS = Node->getOperand(1);
11478 bool isSigned = Node->getOpcode() == ISD::SMULO;
11479
11480 // For power-of-two multiplications we can use a simpler shift expansion.
11481 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11482 const APInt &C = RHSC->getAPIntValue();
11483 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11484 if (C.isPowerOf2()) {
11485 // smulo(x, signed_min) is same as umulo(x, signed_min).
11486 bool UseArithShift = isSigned && !C.isMinSignedValue();
11487 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11488 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11489 Overflow = DAG.getSetCC(dl, SetCCVT,
11490 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11491 dl, VT, Result, ShiftAmt),
11492 LHS, ISD::SETNE);
11493 return true;
11494 }
11495 }
11496
11497 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11498 if (VT.isVector())
11499 WideVT =
11501
11502 SDValue BottomHalf;
11503 SDValue TopHalf;
11504 static const unsigned Ops[2][3] =
11507 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11508 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11509 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11510 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11511 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11512 RHS);
11513 TopHalf = BottomHalf.getValue(1);
11514 } else if (isTypeLegal(WideVT)) {
11515 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11516 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11517 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11518 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11519 SDValue ShiftAmt =
11520 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11521 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11522 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11523 } else {
11524 if (VT.isVector())
11525 return false;
11526
11527 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11528 }
11529
11530 Result = BottomHalf;
11531 if (isSigned) {
11532 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11533 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11534 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11535 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11536 } else {
11537 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11538 DAG.getConstant(0, dl, VT), ISD::SETNE);
11539 }
11540
11541 // Truncate the result if SetCC returns a larger type than needed.
11542 EVT RType = Node->getValueType(1);
11543 if (RType.bitsLT(Overflow.getValueType()))
11544 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11545
11546 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11547 "Unexpected result type for S/UMULO legalization");
11548 return true;
11549}
11550
11552 SDLoc dl(Node);
11553 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11554 SDValue Op = Node->getOperand(0);
11555 EVT VT = Op.getValueType();
11556
11557 // Try to use a shuffle reduction for power of two vectors.
11558 if (VT.isPow2VectorType()) {
11560 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11561 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11562 break;
11563
11564 SDValue Lo, Hi;
11565 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11566 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11567 VT = HalfVT;
11568
11569 // Stop if splitting is enough to make the reduction legal.
11570 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
11571 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
11572 Node->getFlags());
11573 }
11574 }
11575
11576 if (VT.isScalableVector())
11578 "Expanding reductions for scalable vectors is undefined.");
11579
11580 EVT EltVT = VT.getVectorElementType();
11581 unsigned NumElts = VT.getVectorNumElements();
11582
11584 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11585
11586 SDValue Res = Ops[0];
11587 for (unsigned i = 1; i < NumElts; i++)
11588 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11589
11590 // Result type may be wider than element type.
11591 if (EltVT != Node->getValueType(0))
11592 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11593 return Res;
11594}
11595
11597 SDLoc dl(Node);
11598 SDValue AccOp = Node->getOperand(0);
11599 SDValue VecOp = Node->getOperand(1);
11600 SDNodeFlags Flags = Node->getFlags();
11601
11602 EVT VT = VecOp.getValueType();
11603 EVT EltVT = VT.getVectorElementType();
11604
11605 if (VT.isScalableVector())
11607 "Expanding reductions for scalable vectors is undefined.");
11608
11609 unsigned NumElts = VT.getVectorNumElements();
11610
11612 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11613
11614 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11615
11616 SDValue Res = AccOp;
11617 for (unsigned i = 0; i < NumElts; i++)
11618 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11619
11620 return Res;
11621}
11622
11624 SelectionDAG &DAG) const {
11625 EVT VT = Node->getValueType(0);
11626 SDLoc dl(Node);
11627 bool isSigned = Node->getOpcode() == ISD::SREM;
11628 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11629 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11630 SDValue Dividend = Node->getOperand(0);
11631 SDValue Divisor = Node->getOperand(1);
11632 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11633 SDVTList VTs = DAG.getVTList(VT, VT);
11634 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11635 return true;
11636 }
11637 if (isOperationLegalOrCustom(DivOpc, VT)) {
11638 // X % Y -> X-X/Y*Y
11639 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11640 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11641 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11642 return true;
11643 }
11644 return false;
11645}
11646
11648 SelectionDAG &DAG) const {
11649 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11650 SDLoc dl(SDValue(Node, 0));
11651 SDValue Src = Node->getOperand(0);
11652
11653 // DstVT is the result type, while SatVT is the size to which we saturate
11654 EVT SrcVT = Src.getValueType();
11655 EVT DstVT = Node->getValueType(0);
11656
11657 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11658 unsigned SatWidth = SatVT.getScalarSizeInBits();
11659 unsigned DstWidth = DstVT.getScalarSizeInBits();
11660 assert(SatWidth <= DstWidth &&
11661 "Expected saturation width smaller than result width");
11662
11663 // Determine minimum and maximum integer values and their corresponding
11664 // floating-point values.
11665 APInt MinInt, MaxInt;
11666 if (IsSigned) {
11667 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11668 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11669 } else {
11670 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11671 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11672 }
11673
11674 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11675 // libcall emission cannot handle this. Large result types will fail.
11676 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11677 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11678 SrcVT = Src.getValueType();
11679 }
11680
11681 const fltSemantics &Sem = SrcVT.getFltSemantics();
11682 APFloat MinFloat(Sem);
11683 APFloat MaxFloat(Sem);
11684
11685 APFloat::opStatus MinStatus =
11686 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11687 APFloat::opStatus MaxStatus =
11688 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11689 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11690 !(MaxStatus & APFloat::opStatus::opInexact);
11691
11692 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11693 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11694
11695 // If the integer bounds are exactly representable as floats and min/max are
11696 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11697 // of comparisons and selects.
11698 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11699 isOperationLegal(ISD::FMAXNUM, SrcVT);
11700 if (AreExactFloatBounds && MinMaxLegal) {
11701 SDValue Clamped = Src;
11702
11703 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11704 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11705 // Clamp by MaxFloat from above. NaN cannot occur.
11706 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11707 // Convert clamped value to integer.
11708 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11709 dl, DstVT, Clamped);
11710
11711 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11712 // which will cast to zero.
11713 if (!IsSigned)
11714 return FpToInt;
11715
11716 // Otherwise, select 0 if Src is NaN.
11717 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11718 EVT SetCCVT =
11719 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11720 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11721 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11722 }
11723
11724 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11725 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11726
11727 // Result of direct conversion. The assumption here is that the operation is
11728 // non-trapping and it's fine to apply it to an out-of-range value if we
11729 // select it away later.
11730 SDValue FpToInt =
11731 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11732
11733 SDValue Select = FpToInt;
11734
11735 EVT SetCCVT =
11736 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11737
11738 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11739 // MinInt if Src is NaN.
11740 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11741 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11742 // If Src OGT MaxFloat, select MaxInt.
11743 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11744 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11745
11746 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11747 // is already zero.
11748 if (!IsSigned)
11749 return Select;
11750
11751 // Otherwise, select 0 if Src is NaN.
11752 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11753 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11754 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11755}
11756
11758 const SDLoc &dl,
11759 SelectionDAG &DAG) const {
11760 EVT OperandVT = Op.getValueType();
11761 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11762 return Op;
11763 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11764 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11765 // can induce double-rounding which may alter the results. We can
11766 // correct for this using a trick explained in: Boldo, Sylvie, and
11767 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11768 // World Congress. 2005.
11769 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
11770 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
11771
11772 // We can keep the narrow value as-is if narrowing was exact (no
11773 // rounding error), the wide value was NaN (the narrow value is also
11774 // NaN and should be preserved) or if we rounded to the odd value.
11775 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
11776 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11777 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11778 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11779 EVT ResultIntVTCCVT = getSetCCResultType(
11780 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11781 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11782 // The result is already odd so we don't need to do anything.
11783 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11784
11785 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11786 Op.getValueType());
11787 // We keep results which are exact, odd or NaN.
11788 SDValue KeepNarrow =
11789 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
11790 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11791 // We morally performed a round-down if AbsNarrow is smaller than
11792 // AbsWide.
11793 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11794 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
11795 SDValue NarrowIsRd =
11796 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11797 // If the narrow value is odd or exact, pick it.
11798 // Otherwise, narrow is even and corresponds to either the rounded-up
11799 // or rounded-down value. If narrow is the rounded-down value, we want
11800 // the rounded-up value as it will be odd.
11801 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11802 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11803 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11804 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11805}
11806
11808 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11809 SDValue Op = Node->getOperand(0);
11810 EVT VT = Node->getValueType(0);
11811 SDLoc dl(Node);
11812 if (VT.getScalarType() == MVT::bf16) {
11813 if (Node->getConstantOperandVal(1) == 1) {
11814 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11815 }
11816 EVT OperandVT = Op.getValueType();
11817 SDValue IsNaN = DAG.getSetCC(
11818 dl,
11819 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11820 Op, Op, ISD::SETUO);
11821
11822 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11823 // can induce double-rounding which may alter the results. We can
11824 // correct for this using a trick explained in: Boldo, Sylvie, and
11825 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11826 // World Congress. 2005.
11827 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11828 EVT I32 = F32.changeTypeToInteger();
11829 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11830 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11831
11832 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11833 // turning into infinities.
11834 SDValue NaN =
11835 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11836
11837 // Factor in the contribution of the low 16 bits.
11838 SDValue One = DAG.getConstant(1, dl, I32);
11839 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11840 DAG.getShiftAmountConstant(16, I32, dl));
11841 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11842 SDValue RoundingBias =
11843 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11844 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11845
11846 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11847 // 0x80000000.
11848 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11849
11850 // Now that we have rounded, shift the bits into position.
11851 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11852 DAG.getShiftAmountConstant(16, I32, dl));
11853 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11854 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11855 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11856 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11857 }
11858 return SDValue();
11859}
11860
11862 SelectionDAG &DAG) const {
11863 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11864 assert(Node->getValueType(0).isScalableVector() &&
11865 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11866
11867 EVT VT = Node->getValueType(0);
11868 SDValue V1 = Node->getOperand(0);
11869 SDValue V2 = Node->getOperand(1);
11870 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11871 SDLoc DL(Node);
11872
11873 // Expand through memory thusly:
11874 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11875 // Store V1, Ptr
11876 // Store V2, Ptr + sizeof(V1)
11877 // If (Imm < 0)
11878 // TrailingElts = -Imm
11879 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11880 // else
11881 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11882 // Res = Load Ptr
11883
11884 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11885
11887 VT.getVectorElementCount() * 2);
11888 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11889 EVT PtrVT = StackPtr.getValueType();
11890 auto &MF = DAG.getMachineFunction();
11891 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11892 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11893
11894 // Store the lo part of CONCAT_VECTORS(V1, V2)
11895 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11896 // Store the hi part of CONCAT_VECTORS(V1, V2)
11897 SDValue OffsetToV2 = DAG.getVScale(
11898 DL, PtrVT,
11900 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11901 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11902
11903 if (Imm >= 0) {
11904 // Load back the required element. getVectorElementPointer takes care of
11905 // clamping the index if it's out-of-bounds.
11906 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11907 // Load the spliced result
11908 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11910 }
11911
11912 uint64_t TrailingElts = -Imm;
11913
11914 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11915 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11916 SDValue TrailingBytes =
11917 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11918
11919 if (TrailingElts > VT.getVectorMinNumElements()) {
11920 SDValue VLBytes =
11921 DAG.getVScale(DL, PtrVT,
11922 APInt(PtrVT.getFixedSizeInBits(),
11924 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11925 }
11926
11927 // Calculate the start address of the spliced result.
11928 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11929
11930 // Load the spliced result
11931 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11933}
11934
11936 SelectionDAG &DAG) const {
11937 SDLoc DL(Node);
11938 SDValue Vec = Node->getOperand(0);
11939 SDValue Mask = Node->getOperand(1);
11940 SDValue Passthru = Node->getOperand(2);
11941
11942 EVT VecVT = Vec.getValueType();
11943 EVT ScalarVT = VecVT.getScalarType();
11944 EVT MaskVT = Mask.getValueType();
11945 EVT MaskScalarVT = MaskVT.getScalarType();
11946
11947 // Needs to be handled by targets that have scalable vector types.
11948 if (VecVT.isScalableVector())
11949 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11950
11951 SDValue StackPtr = DAG.CreateStackTemporary(
11952 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11953 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11954 MachinePointerInfo PtrInfo =
11956
11957 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11958 SDValue Chain = DAG.getEntryNode();
11959 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11960
11961 bool HasPassthru = !Passthru.isUndef();
11962
11963 // If we have a passthru vector, store it on the stack, overwrite the matching
11964 // positions and then re-write the last element that was potentially
11965 // overwritten even though mask[i] = false.
11966 if (HasPassthru)
11967 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11968
11969 SDValue LastWriteVal;
11970 APInt PassthruSplatVal;
11971 bool IsSplatPassthru =
11972 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11973
11974 if (IsSplatPassthru) {
11975 // As we do not know which position we wrote to last, we cannot simply
11976 // access that index from the passthru vector. So we first check if passthru
11977 // is a splat vector, to use any element ...
11978 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11979 } else if (HasPassthru) {
11980 // ... if it is not a splat vector, we need to get the passthru value at
11981 // position = popcount(mask) and re-load it from the stack before it is
11982 // overwritten in the loop below.
11983 EVT PopcountVT = ScalarVT.changeTypeToInteger();
11984 SDValue Popcount = DAG.getNode(
11985 ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11986 Popcount =
11988 MaskVT.changeVectorElementType(PopcountVT), Popcount);
11989 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
11990 SDValue LastElmtPtr =
11991 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11992 LastWriteVal = DAG.getLoad(
11993 ScalarVT, DL, Chain, LastElmtPtr,
11995 Chain = LastWriteVal.getValue(1);
11996 }
11997
11998 unsigned NumElms = VecVT.getVectorNumElements();
11999 for (unsigned I = 0; I < NumElms; I++) {
12000 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
12001 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12002 Chain = DAG.getStore(
12003 Chain, DL, ValI, OutPtr,
12005
12006 // Get the mask value and add it to the current output position. This
12007 // either increments by 1 if MaskI is true or adds 0 otherwise.
12008 // Freeze in case we have poison/undef mask entries.
12009 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
12010 MaskI = DAG.getFreeze(MaskI);
12011 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
12012 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
12013 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
12014
12015 if (HasPassthru && I == NumElms - 1) {
12016 SDValue EndOfVector =
12017 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
12018 SDValue AllLanesSelected =
12019 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
12020 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
12021 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12022
12023 // Re-write the last ValI if all lanes were selected. Otherwise,
12024 // overwrite the last write it with the passthru value.
12025 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
12026 LastWriteVal, SDNodeFlags::Unpredictable);
12027 Chain = DAG.getStore(
12028 Chain, DL, LastWriteVal, OutPtr,
12030 }
12031 }
12032
12033 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12034}
12035
12037 SelectionDAG &DAG) const {
12038 SDLoc DL(N);
12039 SDValue Acc = N->getOperand(0);
12040 SDValue MulLHS = N->getOperand(1);
12041 SDValue MulRHS = N->getOperand(2);
12042 EVT AccVT = Acc.getValueType();
12043 EVT MulOpVT = MulLHS.getValueType();
12044
12045 EVT ExtMulOpVT =
12047 MulOpVT.getVectorElementCount());
12048
12049 unsigned ExtOpcLHS = N->getOpcode() == ISD::PARTIAL_REDUCE_UMLA
12052 unsigned ExtOpcRHS = N->getOpcode() == ISD::PARTIAL_REDUCE_SMLA
12055
12056 if (ExtMulOpVT != MulOpVT) {
12057 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
12058 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
12059 }
12060 SDValue Input = MulLHS;
12061 APInt ConstantOne;
12062 if (!ISD::isConstantSplatVector(MulRHS.getNode(), ConstantOne) ||
12063 !ConstantOne.isOne())
12064 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12065
12066 unsigned Stride = AccVT.getVectorMinNumElements();
12067 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12068
12069 // Collect all of the subvectors
12070 std::deque<SDValue> Subvectors = {Acc};
12071 for (unsigned I = 0; I < ScaleFactor; I++)
12072 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
12073
12074 // Flatten the subvector tree
12075 while (Subvectors.size() > 1) {
12076 Subvectors.push_back(
12077 DAG.getNode(ISD::ADD, DL, AccVT, {Subvectors[0], Subvectors[1]}));
12078 Subvectors.pop_front();
12079 Subvectors.pop_front();
12080 }
12081
12082 assert(Subvectors.size() == 1 &&
12083 "There should only be one subvector after tree flattening");
12084
12085 return Subvectors[0];
12086}
12087
12089 SDValue &LHS, SDValue &RHS,
12090 SDValue &CC, SDValue Mask,
12091 SDValue EVL, bool &NeedInvert,
12092 const SDLoc &dl, SDValue &Chain,
12093 bool IsSignaling) const {
12094 MVT OpVT = LHS.getSimpleValueType();
12095 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
12096 NeedInvert = false;
12097 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12098 bool IsNonVP = !EVL;
12099 switch (getCondCodeAction(CCCode, OpVT)) {
12100 default:
12101 llvm_unreachable("Unknown condition code action!");
12103 // Nothing to do.
12104 break;
12107 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12108 std::swap(LHS, RHS);
12109 CC = DAG.getCondCode(InvCC);
12110 return true;
12111 }
12112 // Swapping operands didn't work. Try inverting the condition.
12113 bool NeedSwap = false;
12114 InvCC = getSetCCInverse(CCCode, OpVT);
12115 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
12116 // If inverting the condition is not enough, try swapping operands
12117 // on top of it.
12118 InvCC = ISD::getSetCCSwappedOperands(InvCC);
12119 NeedSwap = true;
12120 }
12121 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12122 CC = DAG.getCondCode(InvCC);
12123 NeedInvert = true;
12124 if (NeedSwap)
12125 std::swap(LHS, RHS);
12126 return true;
12127 }
12128
12129 // Special case: expand i1 comparisons using logical operations.
12130 if (OpVT == MVT::i1) {
12131 SDValue Ret;
12132 switch (CCCode) {
12133 default:
12134 llvm_unreachable("Unknown integer setcc!");
12135 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12136 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
12137 MVT::i1);
12138 break;
12139 case ISD::SETNE: // X != Y --> (X ^ Y)
12140 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
12141 break;
12142 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12143 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12144 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
12145 DAG.getNOT(dl, LHS, MVT::i1));
12146 break;
12147 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12148 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12149 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
12150 DAG.getNOT(dl, RHS, MVT::i1));
12151 break;
12152 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12153 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12154 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
12155 DAG.getNOT(dl, LHS, MVT::i1));
12156 break;
12157 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12158 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12159 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
12160 DAG.getNOT(dl, RHS, MVT::i1));
12161 break;
12162 }
12163
12164 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
12165 RHS = SDValue();
12166 CC = SDValue();
12167 return true;
12168 }
12169
12171 unsigned Opc = 0;
12172 switch (CCCode) {
12173 default:
12174 llvm_unreachable("Don't know how to expand this condition!");
12175 case ISD::SETUO:
12176 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
12177 CC1 = ISD::SETUNE;
12178 CC2 = ISD::SETUNE;
12179 Opc = ISD::OR;
12180 break;
12181 }
12183 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12184 NeedInvert = true;
12185 [[fallthrough]];
12186 case ISD::SETO:
12188 "If SETO is expanded, SETOEQ must be legal!");
12189 CC1 = ISD::SETOEQ;
12190 CC2 = ISD::SETOEQ;
12191 Opc = ISD::AND;
12192 break;
12193 case ISD::SETONE:
12194 case ISD::SETUEQ:
12195 // If the SETUO or SETO CC isn't legal, we might be able to use
12196 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12197 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12198 // the operands.
12199 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12200 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
12201 isCondCodeLegal(ISD::SETOLT, OpVT))) {
12202 CC1 = ISD::SETOGT;
12203 CC2 = ISD::SETOLT;
12204 Opc = ISD::OR;
12205 NeedInvert = ((unsigned)CCCode & 0x8U);
12206 break;
12207 }
12208 [[fallthrough]];
12209 case ISD::SETOEQ:
12210 case ISD::SETOGT:
12211 case ISD::SETOGE:
12212 case ISD::SETOLT:
12213 case ISD::SETOLE:
12214 case ISD::SETUNE:
12215 case ISD::SETUGT:
12216 case ISD::SETUGE:
12217 case ISD::SETULT:
12218 case ISD::SETULE:
12219 // If we are floating point, assign and break, otherwise fall through.
12220 if (!OpVT.isInteger()) {
12221 // We can use the 4th bit to tell if we are the unordered
12222 // or ordered version of the opcode.
12223 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12224 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12225 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12226 break;
12227 }
12228 // Fallthrough if we are unsigned integer.
12229 [[fallthrough]];
12230 case ISD::SETLE:
12231 case ISD::SETGT:
12232 case ISD::SETGE:
12233 case ISD::SETLT:
12234 case ISD::SETNE:
12235 case ISD::SETEQ:
12236 // If all combinations of inverting the condition and swapping operands
12237 // didn't work then we have no means to expand the condition.
12238 llvm_unreachable("Don't know how to expand this condition!");
12239 }
12240
12241 SDValue SetCC1, SetCC2;
12242 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12243 // If we aren't the ordered or unorder operation,
12244 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12245 if (IsNonVP) {
12246 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
12247 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
12248 } else {
12249 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
12250 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
12251 }
12252 } else {
12253 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12254 if (IsNonVP) {
12255 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
12256 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
12257 } else {
12258 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
12259 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
12260 }
12261 }
12262 if (Chain)
12263 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12264 SetCC2.getValue(1));
12265 if (IsNonVP)
12266 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12267 else {
12268 // Transform the binary opcode to the VP equivalent.
12269 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12270 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12271 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12272 }
12273 RHS = SDValue();
12274 CC = SDValue();
12275 return true;
12276 }
12277 }
12278 return false;
12279}
12280
12282 SelectionDAG &DAG) const {
12283 EVT VT = Node->getValueType(0);
12284 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12285 // split into two equal parts.
12286 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12287 return SDValue();
12288
12289 // Restrict expansion to cases where both parts can be concatenated.
12290 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12291 if (LoVT != HiVT || !isTypeLegal(LoVT))
12292 return SDValue();
12293
12294 SDLoc DL(Node);
12295 unsigned Opcode = Node->getOpcode();
12296
12297 // Don't expand if the result is likely to be unrolled anyway.
12298 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12299 return SDValue();
12300
12301 SmallVector<SDValue, 4> LoOps, HiOps;
12302 for (const SDValue &V : Node->op_values()) {
12303 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
12304 LoOps.push_back(Lo);
12305 HiOps.push_back(Hi);
12306 }
12307
12308 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
12309 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
12310 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
12311}
12312
12314 const SDLoc &DL,
12315 EVT InVecVT, SDValue EltNo,
12316 LoadSDNode *OriginalLoad,
12317 SelectionDAG &DAG) const {
12318 assert(OriginalLoad->isSimple());
12319
12320 EVT VecEltVT = InVecVT.getVectorElementType();
12321
12322 // If the vector element type is not a multiple of a byte then we are unable
12323 // to correctly compute an address to load only the extracted element as a
12324 // scalar.
12325 if (!VecEltVT.isByteSized())
12326 return SDValue();
12327
12328 ISD::LoadExtType ExtTy =
12329 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12330 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
12331 return SDValue();
12332
12333 std::optional<unsigned> ByteOffset;
12334 Align Alignment = OriginalLoad->getAlign();
12336 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
12337 int Elt = ConstEltNo->getZExtValue();
12338 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
12339 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
12340 Alignment = commonAlignment(Alignment, *ByteOffset);
12341 } else {
12342 // Discard the pointer info except the address space because the memory
12343 // operand can't represent this new access since the offset is variable.
12344 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
12345 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
12346 }
12347
12348 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
12349 return SDValue();
12350
12351 unsigned IsFast = 0;
12352 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
12353 OriginalLoad->getAddressSpace(), Alignment,
12354 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
12355 !IsFast)
12356 return SDValue();
12357
12358 SDValue NewPtr =
12359 getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
12360
12361 // We are replacing a vector load with a scalar load. The new load must have
12362 // identical memory op ordering to the original.
12363 SDValue Load;
12364 if (ResultVT.bitsGT(VecEltVT)) {
12365 // If the result type of vextract is wider than the load, then issue an
12366 // extending load instead.
12367 ISD::LoadExtType ExtType = isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT)
12369 : ISD::EXTLOAD;
12370 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
12371 NewPtr, MPI, VecEltVT, Alignment,
12372 OriginalLoad->getMemOperand()->getFlags(),
12373 OriginalLoad->getAAInfo());
12374 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12375 } else {
12376 // The result type is narrower or the same width as the vector element
12377 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
12378 Alignment, OriginalLoad->getMemOperand()->getFlags(),
12379 OriginalLoad->getAAInfo());
12380 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12381 if (ResultVT.bitsLT(VecEltVT))
12382 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
12383 else
12384 Load = DAG.getBitcast(ResultVT, Load);
12385 }
12386
12387 return Load;
12388}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool isSigned(unsigned int Opcode)
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
#define P(N)
Function const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1347
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1158
APInt bitcastToAPInt() const
Definition APFloat.h:1353
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1138
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1098
void changeSign()
Definition APFloat.h:1297
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1109
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1573
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1758
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1406
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:423
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1391
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1340
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:216
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1396
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:834
void negate()
Negate this APInt in place.
Definition APInt.h:1468
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
unsigned countLeadingZeros() const
Definition APInt.h:1606
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:356
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1435
unsigned logBase2() const
Definition APInt.h:1761
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:827
void setAllBits()
Set every bit to 1.
Definition APInt.h:1319
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1274
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:405
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:334
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1150
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1367
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1417
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1442
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1343
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:198
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:445
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
iterator end() const
Definition ArrayRef.h:348
iterator begin() const
Definition ArrayRef.h:347
Class to represent pointers.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:581
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:148
iterator end() const
Definition StringRef.h:122
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
TargetLoweringBase(const TargetMachine &TM)
NOTE: The TargetMachine owns TLOF.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:774
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:296
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:107
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:705
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3009
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:774
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:525
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:387
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:515
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:393
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:892
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:400
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:706
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:773
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:351
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:881
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:406
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:701
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:648
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:903
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
void stable_sort(R &&Range)
Definition STLExtras.h:2060
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:551
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:295
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:336
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1765
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
Definition ModRef.h:68
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1569
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:378
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:380
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:425
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:465
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:407
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:376
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:318
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:303
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:448
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:294
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:179
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:248
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:101
bool isZero() const
Returns true if value is all zero.
Definition KnownBits.h:80
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:235
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:154
KnownBits byteSwap() const
Definition KnownBits.h:507
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:282
KnownBits reverseBits() const
Definition KnownBits.h:511
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:226
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:165
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:314
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:69
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:304
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:173
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:241
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:60
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:98
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:160
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:279
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...