LLVM 22.0.0git
NVPTXISelLowering.h
Go to the documentation of this file.
1//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that NVPTX uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
15#define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
16
17#include "NVPTX.h"
21
22namespace llvm {
23namespace NVPTXISD {
24enum NodeType : unsigned {
25 // Start the numbering from where ISD NodeType finishes.
28
29 /// These nodes represent a parameter declaration. In PTX this will look like:
30 /// .param .align 16 .b8 param0[1024];
31 /// .param .b32 retval0;
32 ///
33 /// DeclareArrayParam(Chain, Externalsym, Align, Size, Glue)
34 /// DeclareScalarParam(Chain, Externalsym, Size, Glue)
37
38 /// This node represents a PTX call instruction. It's operands are as follows:
39 ///
40 /// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
41 /// NumParams, Callee, Proto)
43
55
56 /// This node is similar to ISD::BUILD_VECTOR except that the output may be
57 /// implicitly bitcast to a scalar. This allows for the representation of
58 /// packing move instructions for vector types which are not legal i.e. v2i32
60
61 /// This node is the inverse of NVPTX::BUILD_VECTOR. It takes a single value
62 /// which may be a scalar and unpacks it into multiple values by implicitly
63 /// converting it to a vector.
65
71
82
84
85 /// These nodes are used to lower atomic instructions with i128 type. They are
86 /// similar to the generic nodes, but the input and output values are split
87 /// into two 64-bit values.
88 /// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP_B128(INCHAIN, ptr, cmpLo, cmpHi,
89 /// swapLo, swapHi)
90 /// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP_B128(INCHAIN, ptr, amtLo, amtHi)
93
97 LDUV2, // LDU.v2
98 LDUV4, // LDU.v4
103};
104}
105
106class NVPTXSubtarget;
107
108//===--------------------------------------------------------------------===//
109// TargetLowering Implementation
110//===--------------------------------------------------------------------===//
112public:
113 explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
114 const NVPTXSubtarget &STI);
115 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
116
117 const char *getTargetNodeName(unsigned Opcode) const override;
118
120 MachineFunction &MF,
121 unsigned Intrinsic) const override;
122
123 Align getFunctionArgumentAlignment(const Function *F, Type *Ty, unsigned Idx,
124 const DataLayout &DL) const;
125
126 /// getFunctionParamOptimizedAlign - since function arguments are passed via
127 /// .param space, we may want to increase their alignment in a way that
128 /// ensures that we can effectively vectorize their loads & stores. We can
129 /// increase alignment only if the function has internal or has private
130 /// linkage as for other linkage types callers may already rely on default
131 /// alignment. To allow using 128-bit vectorized loads/stores, this function
132 /// ensures that alignment is 16 or greater.
134 const DataLayout &DL) const;
135
136 /// Helper for computing alignment of a device function byval parameter.
138 Align InitialAlign,
139 const DataLayout &DL) const;
140
141 // Helper for getting a function parameter name. Name is composed from
142 // its index and the function name. Negative index corresponds to special
143 // parameter (unsized array) used for passing variable arguments.
144 std::string getParamName(const Function *F, int Idx) const;
145
146 /// isLegalAddressingMode - Return true if the addressing mode represented
147 /// by AM is legal for this target, for a load/store of the specified type
148 /// Used to guide target specific optimizations, like loop strength
149 /// reduction (LoopStrengthReduce.cpp) and memory optimization for
150 /// address mode (CodeGenPrepare.cpp)
151 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
152 unsigned AS,
153 Instruction *I = nullptr) const override;
154
155 bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
156 // Truncating 64-bit to 32-bit is free in SASS.
157 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
158 return false;
159 return SrcTy->getPrimitiveSizeInBits() == 64 &&
160 DstTy->getPrimitiveSizeInBits() == 32;
161 }
162
164 EVT VT) const override {
165 if (VT.isVector())
166 return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
167 return MVT::i1;
168 }
169
170 ConstraintType getConstraintType(StringRef Constraint) const override;
171 std::pair<unsigned, const TargetRegisterClass *>
173 StringRef Constraint, MVT VT) const override;
174
176 bool isVarArg,
178 const SDLoc &dl, SelectionDAG &DAG,
179 SmallVectorImpl<SDValue> &InVals) const override;
180
181 SDValue LowerCall(CallLoweringInfo &CLI,
182 SmallVectorImpl<SDValue> &InVals) const override;
183
187
188 std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
190 std::optional<unsigned> FirstVAArg,
191 const CallBase &CB, unsigned UniqueCallSite) const;
192
193 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
195 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
196 SelectionDAG &DAG) const override;
197
199 std::vector<SDValue> &Ops,
200 SelectionDAG &DAG) const override;
201
203
204 // PTX always uses 32-bit shift amounts
205 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
206 return MVT::i32;
207 }
208
210 getPreferredVectorAction(MVT VT) const override;
211
212 // Get the degree of precision we want from 32-bit floating point division
213 // operations.
215 const SDNode &N) const;
216
217 // Get whether we should use a precise or approximate 32-bit floating point
218 // sqrt instruction.
219 bool usePrecSqrtF32(const SDNode *N = nullptr) const;
220
221 // Get whether we should use instructions that flush floating-point denormals
222 // to sign-preserving zero.
223 bool useF32FTZ(const MachineFunction &MF) const;
224
226 int &ExtraSteps, bool &UseOneConst,
227 bool Reciprocal) const override;
228
229 unsigned combineRepeatedFPDivisors() const override { return 2; }
230
231 bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const;
232
234 EVT) const override {
235 return true;
236 }
237
238 // The default is the same as pointer type, but brx.idx only accepts i32
239 MVT getJumpTableRegTy(const DataLayout &) const override { return MVT::i32; }
240
241 unsigned getJumpTableEncoding() const override;
242
243 bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
244
245 // The default is to transform llvm.ctlz(x, false) (where false indicates that
246 // x == 0 is not undefined behavior) into a branch that checks whether x is 0
247 // and avoids calling ctlz in that case. We have a dedicated ctlz
248 // instruction, so we say that ctlz is cheap to speculate.
249 bool isCheapToSpeculateCtlz(Type *Ty) const override { return true; }
250
253 }
254
257 }
258
260 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
261
262 bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override {
263 // There's rarely any point of packing something into a vector type if we
264 // already have the source data.
265 return true;
266 }
267
268 bool shouldInsertFencesForAtomic(const Instruction *) const override;
269
272
274 AtomicOrdering Ord) const override;
276 AtomicOrdering Ord) const override;
277
278 unsigned getPreferredFPToIntOpcode(unsigned Op, EVT FromVT,
279 EVT ToVT) const override;
280
282 const APInt &DemandedElts,
283 const SelectionDAG &DAG,
284 unsigned Depth = 0) const override;
286 const APInt &DemandedElts,
287 KnownBits &Known,
288 TargetLoweringOpt &TLO,
289 unsigned Depth = 0) const override;
290
291private:
292 const NVPTXSubtarget &STI; // cache the subtarget here
293 mutable unsigned GlobalUniqueCallSite;
294
295 SDValue getParamSymbol(SelectionDAG &DAG, int I, EVT T) const;
296 SDValue getCallParamSymbol(SelectionDAG &DAG, int I, EVT T) const;
297 SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
298 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
299
300 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
301 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
302 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
303 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
304 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
305 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
306
307 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
308
309 SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
310 SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
311 SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
312
313 SDValue PromoteBinOpIfF32FTZ(SDValue Op, SelectionDAG &DAG) const;
314
315 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
316 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
317
318 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
319 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
320
321 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
322 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
323 SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
324
325 SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
326 SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
327
328 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
329
330 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
331 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
332
333 SDValue LowerCopyToReg_128(SDValue Op, SelectionDAG &DAG) const;
334 unsigned getNumRegisters(LLVMContext &Context, EVT VT,
335 std::optional<MVT> RegisterVT) const override;
336 bool
337 splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
338 SDValue *Parts, unsigned NumParts, MVT PartVT,
339 std::optional<CallingConv::ID> CC) const override;
340
341 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
342 SelectionDAG &DAG) const override;
343 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
344
345 Align getArgumentAlignment(const CallBase *CB, Type *Ty, unsigned Idx,
346 const DataLayout &DL) const;
347};
348
349} // namespace llvm
350
351#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
Definition: APInt.h:78
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:709
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
This class represents a function call, abstracting a target machine's calling convention.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
An instruction for reading from memory.
Definition: Instructions.h:180
Machine Value Type.
AtomicOrdering atomicOperationOrderAfterFenceSplit(const Instruction *I) const override
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
const NVPTXTargetMachine * nvTM
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &, const SmallVectorImpl< ISD::OutputArg > &, std::optional< unsigned > FirstVAArg, const CallBase &CB, unsigned UniqueCallSite) const
MVT getJumpTableRegTy(const DataLayout &) const override
unsigned getPreferredFPToIntOpcode(unsigned Op, EVT FromVT, EVT ToVT) const override
bool useF32FTZ(const MachineFunction &MF) const
SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const
unsigned combineRepeatedFPDivisors() const override
Indicate whether this target prefers to combine FDIVs with the same divisor.
Align getFunctionArgumentAlignment(const Function *F, Type *Ty, unsigned Idx, const DataLayout &DL) const
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &ExtraSteps, bool &UseOneConst, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const
bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
std::string getParamName(const Function *F, int Idx) const
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
NVPTX::DivPrecisionLevel getDivF32Level(const MachineFunction &MF, const SDNode &N) const
bool shouldInsertFencesForAtomic(const Instruction *) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, const DataLayout &DL) const
getFunctionParamOptimizedAlign - since function arguments are passed via .param space,...
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override
Return the type to use for a scalar shift opcode, given the shifted amount type.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Align getFunctionByValParamAlign(const Function *F, Type *ArgTy, Align InitialAlign, const DataLayout &DL) const
Helper for computing alignment of a device function byval parameter.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const
bool usePrecSqrtF32(const SDNode *N=nullptr) const
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
An instruction for storing to memory.
Definition: Instructions.h:296
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1568
@ CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED
@ CALL
This node represents a PTX call instruction.
@ CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X
@ UNPACK_VECTOR
This node is the inverse of NVPTX::BUILD_VECTOR.
@ CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y
@ DeclareScalarParam
These nodes represent a parameter declaration.
@ CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z
@ ATOMIC_CMP_SWAP_B128
These nodes are used to lower atomic instructions with i128 type.
@ BUILD_VECTOR
This node is similar to ISD::BUILD_VECTOR except that the output may be implicitly bitcast to a scala...
DivPrecisionLevel
Definition: NVPTX.h:251
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:82
AtomicOrdering
Atomic ordering for LLVM's memory model.
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...