LLVM 22.0.0git
NVPTXSubtarget.cpp
Go to the documentation of this file.
1//===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the NVPTX specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "NVPTXSubtarget.h"
15#include "NVPTXTargetMachine.h"
18
19using namespace llvm;
20
21#define DEBUG_TYPE "nvptx-subtarget"
22
23#define GET_SUBTARGETINFO_ENUM
24#define GET_SUBTARGETINFO_TARGET_DESC
25#define GET_SUBTARGETINFO_CTOR
26#include "NVPTXGenSubtargetInfo.inc"
27
28static cl::opt<bool>
29 NoF16Math("nvptx-no-f16-math", cl::Hidden,
30 cl::desc("NVPTX Specific: Disable generation of f16 math ops."),
31 cl::init(false));
32
33static cl::opt<bool> NoF32x2("nvptx-no-f32x2", cl::Hidden,
34 cl::desc("NVPTX Specific: Disable generation of "
35 "f32x2 instructions and registers."),
36 cl::init(false));
37
38// Pin the vtable to this file.
39void NVPTXSubtarget::anchor() {}
40
42 StringRef FS) {
43 TargetName = std::string(CPU);
44
46
47 // Re-map SM version numbers, SmVersion carries the regular SMs which do
48 // have relative order, while FullSmVersion allows distinguishing sm_90 from
49 // sm_90a, which would *not* be a subset of sm_91.
50 SmVersion = getSmVersion();
51
52 // Set default to PTX 6.0 (CUDA 9.0)
53 if (PTXVersion == 0) {
54 PTXVersion = 60;
55 }
56
57 return *this;
58}
59
60NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
61 const std::string &FS,
62 const NVPTXTargetMachine &TM)
63 : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
64 FullSmVersion(200), SmVersion(getSmVersion()),
65 TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {
66 TSInfo = std::make_unique<NVPTXSelectionDAGInfo>();
67}
68
70
72 return TSInfo.get();
73}
74
76 return hasFP16Math() && NoF16Math == false;
77}
78
80 return SmVersion >= 100 && PTXVersion >= 86 && !NoF32x2;
81}
82
84 if (!hasBF16Math())
85 return false;
86
87 switch (Opcode) {
88 // Several BF16 instructions are available on sm_90 only.
89 case ISD::FADD:
90 case ISD::FMUL:
91 case ISD::FSUB:
92 case ISD::SELECT:
93 case ISD::SELECT_CC:
94 case ISD::SETCC:
95 case ISD::FEXP2:
96 case ISD::FCEIL:
97 case ISD::FFLOOR:
98 case ISD::FNEARBYINT:
99 case ISD::FRINT:
100 case ISD::FROUNDEVEN:
101 case ISD::FTRUNC:
102 return getSmVersion() >= 90 && getPTXVersion() >= 78;
103 // Several BF16 instructions are available on sm_80 only.
104 case ISD::FMINNUM:
105 case ISD::FMAXNUM:
108 case ISD::FMAXIMUM:
109 case ISD::FMINIMUM:
110 return getSmVersion() >= 80 && getPTXVersion() >= 70;
111 }
112 return true;
113}
114
116 std::string const &FailureMessage) const {
117 if (hasClusters())
118 return;
119
121 "NVPTX SM architecture \"{}\" and PTX version \"{}\" do not support {}. "
122 "Requires SM >= 90 and PTX >= 78.",
123 getFullSmVersion(), PTXVersion, FailureMessage));
124}
static cl::opt< bool > NoF32x2("nvptx-no-f32x2", cl::Hidden, cl::desc("NVPTX Specific: Disable generation of " "f32x2 instructions and registers."), cl::init(false))
static cl::opt< bool > NoF16Math("nvptx-no-f16-math", cl::Hidden, cl::desc("NVPTX Specific: Disable generation of f16 math ops."), cl::init(false))
void failIfClustersUnsupported(std::string const &FailureMessage) const
std::string getTargetName() const
bool hasClusters() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
unsigned getPTXVersion() const
~NVPTXSubtarget() override
bool hasNativeBF16Support(int Opcode) const
unsigned int getFullSmVersion() const
unsigned int getSmVersion() const
bool hasBF16Math() const
bool hasF32x2Instructions() const
bool allowFP16Math() const
NVPTXSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const NVPTXTargetMachine &TM)
This constructor initializes the data members to match that of the specified module.
NVPTXSubtarget & initializeSubtargetDependencies(StringRef CPU, StringRef FS)
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool hasFP16Math() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:47
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:801
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:410
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:778
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1075
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:793
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
Definition: ISDOpcodes.h:1059
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1081
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167