23#include "llvm/IR/IntrinsicsDirectX.h"
32#define DEBUG_TYPE "dxil-intrinsic-expansion"
47 if (IsRaw && M->getTargetTriple().getDXILVersion() >
VersionTuple(1, 2))
55 switch (
F.getIntrinsicID()) {
57 case Intrinsic::atan2:
59 case Intrinsic::is_fpclass:
61 case Intrinsic::log10:
64 case Intrinsic::dx_all:
65 case Intrinsic::dx_any:
66 case Intrinsic::dx_cross:
67 case Intrinsic::dx_uclamp:
68 case Intrinsic::dx_sclamp:
69 case Intrinsic::dx_nclamp:
70 case Intrinsic::dx_degrees:
71 case Intrinsic::dx_lerp:
72 case Intrinsic::dx_normalize:
73 case Intrinsic::dx_fdot:
74 case Intrinsic::dx_sdot:
75 case Intrinsic::dx_udot:
76 case Intrinsic::dx_sign:
77 case Intrinsic::dx_step:
78 case Intrinsic::dx_radians:
79 case Intrinsic::usub_sat:
80 case Intrinsic::vector_reduce_add:
81 case Intrinsic::vector_reduce_fadd:
83 case Intrinsic::dx_resource_load_rawbuffer:
85 F.getParent(),
F.getReturnType()->getStructElementType(0),
87 case Intrinsic::dx_resource_load_typedbuffer:
89 F.getParent(),
F.getReturnType()->getStructElementType(0),
91 case Intrinsic::dx_resource_store_rawbuffer:
93 F.getParent(),
F.getFunctionType()->getParamType(3),
true);
94 case Intrinsic::dx_resource_store_typedbuffer:
96 F.getParent(),
F.getFunctionType()->getParamType(2),
false);
104 Type *Ty =
A->getType();
110 Value *Zero = ConstantInt::get(Ty, 0);
115 assert(IntrinsicId == Intrinsic::vector_reduce_add ||
116 IntrinsicId == Intrinsic::vector_reduce_fadd);
119 bool IsFAdd = (IntrinsicId == Intrinsic::vector_reduce_fadd);
122 Type *Ty =
X->getType();
123 auto *XVec = dyn_cast<FixedVectorType>(Ty);
124 unsigned XVecSize = XVec->getNumElements();
135 for (
unsigned I = 1;
I < XVecSize;
I++) {
149 Type *Ty =
X->getType();
155 ConstantInt::get(EltTy, 0))
156 : ConstantInt::get(EltTy, 0);
186 Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
187 Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
188 Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
201 Type *ATy =
A->getType();
202 [[maybe_unused]]
Type *BTy =
B->getType();
207 auto *AVec = dyn_cast<FixedVectorType>(ATy);
212 int NumElts = AVec->getNumElements();
215 DotIntrinsic = Intrinsic::dx_dot2;
218 DotIntrinsic = Intrinsic::dx_dot3;
221 DotIntrinsic = Intrinsic::dx_dot4;
225 "Invalid dot product input vector: length is outside 2-4");
230 for (
int I = 0;
I < NumElts; ++
I)
232 for (
int I = 0;
I < NumElts; ++
I)
249 assert(DotIntrinsic == Intrinsic::dx_sdot ||
250 DotIntrinsic == Intrinsic::dx_udot);
253 Type *ATy =
A->getType();
254 [[maybe_unused]]
Type *BTy =
B->getType();
259 auto *AVec = dyn_cast<FixedVectorType>(ATy);
264 Intrinsic::ID MadIntrinsic = DotIntrinsic == Intrinsic::dx_sdot
266 : Intrinsic::dx_umad;
270 for (
unsigned I = 1;
I < AVec->getNumElements();
I++) {
283 Type *Ty =
X->getType();
293 Builder.
CreateIntrinsic(Ty, Intrinsic::exp2, {NewX},
nullptr,
"dx.exp2");
301 auto *TCI = dyn_cast<ConstantInt>(
T);
305 switch (TCI->getZExtValue()) {
306 case FPClassTest::fcInf:
307 case FPClassTest::fcNan:
308 case FPClassTest::fcNormal:
309 case FPClassTest::fcFinite:
316 Type *FTy =
F->getType();
317 unsigned FNumElem = 0;
322 if (
auto *FVecTy = dyn_cast<FixedVectorType>(FTy)) {
323 Type *ElemTy = FVecTy->getElementType();
324 FNumElem = FVecTy->getNumElements();
333 switch (TCI->getZExtValue()) {
334 case FPClassTest::fcNegZero: {
341 Builder.
CreateICmpEQ(FBitCast, NegZeroSplat,
"is.fpclass.negzero");
343 RetVal = Builder.
CreateICmpEQ(FBitCast, NegZero,
"is.fpclass.negzero");
355 Type *Ty =
X->getType();
360 if (IntrinsicId == Intrinsic::dx_any)
361 return Builder.
CreateOr(Result, Elt);
362 assert(IntrinsicId == Intrinsic::dx_all);
366 Value *Result =
nullptr;
372 auto *XVec = dyn_cast<FixedVectorType>(Ty);
378 ConstantFP::get(EltTy, 0)))
382 ConstantInt::get(EltTy, 0)));
384 for (
unsigned I = 1;
I < XVec->getNumElements();
I++) {
386 Result = ApplyOp(IntrinsicId, Result, Elt);
406 Type *Ty =
X->getType();
412 ConstantFP::get(EltTy, LogConstVal))
413 : ConstantFP::get(EltTy, LogConstVal);
418 return Builder.
CreateFMul(Ln2Const, Log2Call);
432 auto *XVec = dyn_cast<FixedVectorType>(Ty);
434 if (
auto *constantFP = dyn_cast<ConstantFP>(
X)) {
435 const APFloat &fpVal = constantFP->getValueAPF();
446 if (
auto *constantFP = dyn_cast<ConstantFP>(DotProduct)) {
447 const APFloat &fpVal = constantFP->getValueAPF();
454 nullptr,
"dx.rsqrt");
456 Value *MultiplicandVec =
464 Type *Ty =
X->getType();
471 Builder.
CreateIntrinsic(Ty, Intrinsic::atan, {Tan},
nullptr,
"Elt.Atan");
479 Constant *Zero = ConstantFP::get(Ty, 0);
484 Value *Result = Atan;
492 Result = Builder.
CreateSelect(XLt0AndYGe0, AtanAddPi, Result);
496 Result = Builder.
CreateSelect(XLt0AndYLt0, AtanSubPi, Result);
500 Result = Builder.
CreateSelect(XEq0AndYLt0, NegHalfPi, Result);
504 Result = Builder.
CreateSelect(XEq0AndYGe0, HalfPi, Result);
513 Type *Ty =
X->getType();
516 if (IntrinsicId == Intrinsic::powi)
533 Type *Ty =
X->getType();
541 auto *XVec = dyn_cast<FixedVectorType>(Ty);
553 Type *Ty =
X->getType();
566 "Only expand double or int64 scalars or vectors");
567 bool IsVector =
false;
568 unsigned ExtractNum = 2;
569 if (
auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
570 ExtractNum = 2 * VT->getNumElements();
572 assert(IsRaw || ExtractNum == 4 &&
"TypedBufferLoad vector must be size 2");
581 while (ExtractNum > 0) {
582 unsigned LoadNum = std::min(ExtractNum, 4u);
586 Intrinsic::ID LoadIntrinsic = Intrinsic::dx_resource_load_typedbuffer;
589 LoadIntrinsic = Intrinsic::dx_resource_load_rawbuffer;
601 for (
unsigned I = 0;
I < LoadNum; ++
I)
606 for (
unsigned I = 0;
I < LoadNum;
I += 2) {
607 Value *Combined =
nullptr;
612 {ExtractElements[I], ExtractElements[I + 1]});
633 ExtractNum -= LoadNum;
637 Value *CheckBit =
nullptr;
641 auto *EVI = dyn_cast<ExtractValueInst>(U);
648 if (Indices[0] == 0) {
650 EVI->replaceAllUsesWith(Result);
653 assert(Indices[0] == 1 &&
"Unexpected type for typedbufferload");
658 for (
Value *L : Loads)
664 EVI->eraseFromParent();
673 unsigned ValIndex = IsRaw ? 3 : 2;
678 "Only expand double or int64 scalars or vectors");
681 bool IsVector =
false;
682 unsigned ExtractNum = 2;
684 if (
auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
685 VecLen = VT->getNumElements();
686 assert(IsRaw || VecLen == 2 &&
"TypedBufferStore vector must be size 2");
687 ExtractNum = VecLen * 2;
693 Type *ResultTy = VectorType::get(Int32Ty, ExtractNum,
false);
696 Type *SplitElementTy = Int32Ty;
698 SplitElementTy = VectorType::get(SplitElementTy, VecLen,
false);
700 Value *LowBits =
nullptr;
701 Value *HighBits =
nullptr;
718 LowBits = Builder.
CreateTrunc(InputVal, SplitElementTy);
720 HighBits = Builder.
CreateTrunc(ShiftedVal, SplitElementTy);
725 for (
unsigned I = 0;
I < VecLen; ++
I) {
727 Mask.push_back(
I + VecLen);
739 while (ExtractNum > 0) {
740 unsigned StoreNum = std::min(ExtractNum, 4u);
742 Intrinsic::ID StoreIntrinsic = Intrinsic::dx_resource_store_typedbuffer;
745 StoreIntrinsic = Intrinsic::dx_resource_store_rawbuffer;
751 for (
unsigned I = 0;
I < StoreNum; ++
I) {
752 Mask.push_back(
Base +
I);
759 Args.push_back(SubVal);
763 ExtractNum -= StoreNum;
771 if (ClampIntrinsic == Intrinsic::dx_uclamp)
772 return Intrinsic::umax;
773 if (ClampIntrinsic == Intrinsic::dx_sclamp)
774 return Intrinsic::smax;
775 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
776 return Intrinsic::maxnum;
780 if (ClampIntrinsic == Intrinsic::dx_uclamp)
781 return Intrinsic::umin;
782 if (ClampIntrinsic == Intrinsic::dx_sclamp)
783 return Intrinsic::smin;
784 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
785 return Intrinsic::minnum;
793 Type *Ty =
X->getType();
796 {
X, Min},
nullptr,
"dx.max");
798 {MaxCall, Max},
nullptr,
"dx.min");
803 Type *Ty =
X->getType();
811 Type *Ty =
X->getType();
832 return Builder.
CreateSub(ZextGT, ZextLT);
836 Value *Result =
nullptr;
838 switch (IntrinsicId) {
842 case Intrinsic::atan2:
848 case Intrinsic::is_fpclass:
854 case Intrinsic::log10:
858 case Intrinsic::powi:
861 case Intrinsic::dx_all:
862 case Intrinsic::dx_any:
865 case Intrinsic::dx_cross:
868 case Intrinsic::dx_uclamp:
869 case Intrinsic::dx_sclamp:
870 case Intrinsic::dx_nclamp:
873 case Intrinsic::dx_degrees:
876 case Intrinsic::dx_lerp:
879 case Intrinsic::dx_normalize:
882 case Intrinsic::dx_fdot:
885 case Intrinsic::dx_sdot:
886 case Intrinsic::dx_udot:
889 case Intrinsic::dx_sign:
892 case Intrinsic::dx_step:
895 case Intrinsic::dx_radians:
898 case Intrinsic::dx_resource_load_rawbuffer:
902 case Intrinsic::dx_resource_store_rawbuffer:
906 case Intrinsic::dx_resource_load_typedbuffer:
910 case Intrinsic::dx_resource_store_typedbuffer:
914 case Intrinsic::usub_sat:
917 case Intrinsic::vector_reduce_add:
918 case Intrinsic::vector_reduce_fadd:
934 bool IntrinsicExpanded =
false;
936 auto *IntrinsicCall = dyn_cast<CallInst>(U);
941 if (
F.user_empty() && IntrinsicExpanded)
961 "DXIL Intrinsic Expansion",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static Value * expandNormalizeIntrinsic(CallInst *Orig)
static bool expandIntrinsic(Function &F, CallInst *Orig)
static Value * expandClampIntrinsic(CallInst *Orig, Intrinsic::ID ClampIntrinsic)
static bool expansionIntrinsics(Module &M)
static Value * expandLerpIntrinsic(CallInst *Orig)
static Value * expandCrossIntrinsic(CallInst *Orig)
static Value * expandUsubSat(CallInst *Orig)
static Value * expandAnyOrAllIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId)
static Value * expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId)
static Value * expandAtan2Intrinsic(CallInst *Orig)
static Value * expandLog10Intrinsic(CallInst *Orig)
static Intrinsic::ID getMinForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandStepIntrinsic(CallInst *Orig)
static Value * expandIntegerDotIntrinsic(CallInst *Orig, Intrinsic::ID DotIntrinsic)
static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw)
static Value * expandLogIntrinsic(CallInst *Orig, float LogConstVal=numbers::ln2f)
static Value * expandDegreesIntrinsic(CallInst *Orig)
static Value * expandPowIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId)
static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy, bool IsRaw)
static Value * expandExpIntrinsic(CallInst *Orig)
static Value * expandSignIntrinsic(CallInst *Orig)
static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandAbs(CallInst *Orig)
static Value * expandFloatDotIntrinsic(CallInst *Orig, Value *A, Value *B)
static Value * expandRadiansIntrinsic(CallInst *Orig)
static bool isIntrinsicExpansion(Function &F)
static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw)
static Value * expandIsFPClass(CallInst *Orig)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static unsigned getNumElements(Type *Ty)
This file defines the SmallVector class.
support::ulittle16_t & Lo
support::ulittle16_t & Hi
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
DXILIntrinsicExpansionLegacy()
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCall(bool IsTc=true)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isZeroValue() const
Return true if the value is negative zero or null value.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static constexpr ElementCount getFixed(ScalarTy MinVal)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Type * getParamType(unsigned i) const
Parameter type accessors.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFCmpUNE(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Value * CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Type * getVoidTy()
Fetch the type representing void.
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
LLVM_ABI Type * getStructElementType(unsigned N) const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Represents a version number in the form major[.minor[.subminor[.build]]].
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
ModulePass * createDXILIntrinsicExpansionLegacyPass()
Pass to expand intrinsic operations that lack DXIL opCodes.
@ Sub
Subtraction of integers.
constexpr unsigned BitWidth
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.