LLVM 22.0.0git
GCNVOPDUtils.cpp
Go to the documentation of this file.
1//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains the AMDGPU DAG scheduling
10/// mutation to pair VOPD instructions back to back. It also contains
11// subroutines useful in the creation of VOPD instructions
12//
13//===----------------------------------------------------------------------===//
14
15#include "GCNVOPDUtils.h"
16#include "AMDGPUSubtarget.h"
17#include "GCNSubtarget.h"
19#include "SIInstrInfo.h"
21#include "llvm/ADT/STLExtras.h"
31#include "llvm/MC/MCInst.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "gcn-vopd-utils"
36
38 const MachineInstr &FirstMI,
39 const MachineInstr &SecondMI, bool IsVOPD3) {
40 namespace VOPD = AMDGPU::VOPD;
41
42 const MachineFunction *MF = FirstMI.getMF();
43 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
44
45 if (IsVOPD3 && !ST.hasVOPD3())
46 return false;
47 if (!IsVOPD3 && (TII.isVOP3(FirstMI) || TII.isVOP3(SecondMI)))
48 return false;
49 if (TII.isDPP(FirstMI) || TII.isDPP(SecondMI))
50 return false;
51
52 const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
53 const MachineRegisterInfo &MRI = MF->getRegInfo();
54 // Literals also count against scalar bus limit
56 auto addLiteral = [&](const MachineOperand &Op) {
57 for (auto &Literal : UniqueLiterals) {
58 if (Literal->isIdenticalTo(Op))
59 return;
60 }
61 UniqueLiterals.push_back(&Op);
62 };
63 SmallVector<Register> UniqueScalarRegs;
64 assert([&]() -> bool {
65 for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
66 MII != FirstMI.getParent()->instr_end(); ++MII) {
67 if (&*MII == &SecondMI)
68 return true;
69 }
70 return false;
71 }() && "Expected FirstMI to precede SecondMI");
72 // Cannot pair dependent instructions
73 for (const auto &Use : SecondMI.uses())
74 if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI))
75 return false;
76
77 auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
78 const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;
79 const MachineOperand &Operand = MI.getOperand(OperandIdx);
80 if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
81 return Operand.getReg();
82 return Register();
83 };
84
85 auto InstInfo =
86 AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc());
87
88 for (auto CompIdx : VOPD::COMPONENTS) {
89 const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;
90
91 const MachineOperand &Src0 = *TII.getNamedOperand(MI, AMDGPU::OpName::src0);
92 if (Src0.isReg()) {
93 if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
94 if (!is_contained(UniqueScalarRegs, Src0.getReg()))
95 UniqueScalarRegs.push_back(Src0.getReg());
96 }
97 } else if (!TII.isInlineConstant(Src0)) {
98 if (IsVOPD3)
99 return false;
100 addLiteral(Src0);
101 }
102
103 if (InstInfo[CompIdx].hasMandatoryLiteral()) {
104 if (IsVOPD3)
105 return false;
106
107 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
108 addLiteral(MI.getOperand(CompOprIdx));
109 }
110 if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
111 UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
112
113 if (IsVOPD3) {
114 for (auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
115 const MachineOperand *Src = TII.getNamedOperand(MI, OpName);
116 if (!Src)
117 continue;
118 if (OpName == AMDGPU::OpName::src2) {
119 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::bitop3))
120 continue;
121 if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
122 UniqueScalarRegs.push_back(Src->getReg());
123 continue;
124 }
125 }
126 if (!Src->isReg() || !TRI->isVGPR(MRI, Src->getReg()))
127 return false;
128 }
129
130 for (auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod,
131 AMDGPU::OpName::op_sel}) {
132 if (TII.hasModifiersSet(MI, OpName))
133 return false;
134 }
135
136 // Neg is allowed, other modifiers are not. NB: even though sext has the
137 // same value as neg, there are no combinable instructions with sext.
138 for (auto OpName :
139 {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
140 AMDGPU::OpName::src2_modifiers}) {
141 const MachineOperand *Mods = TII.getNamedOperand(MI, OpName);
142 if (Mods && (Mods->getImm() & ~SISrcMods::NEG))
143 return false;
144 }
145 }
146 }
147
148 if (UniqueLiterals.size() > 1)
149 return false;
150 if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
151 return false;
152
153 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
154 // source-cache.
155 bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&
156 FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
157 SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32;
158 bool AllowSameVGPR = ST.hasGFX1250Insts();
159
160 if (InstInfo.hasInvalidOperand(getVRegIdx, *TRI, SkipSrc, AllowSameVGPR,
161 IsVOPD3))
162 return false;
163
164 if (IsVOPD3) {
165 // BITOP3 can be converted to DUAL_BITOP2 only if src2 is zero.
166 if (AMDGPU::hasNamedOperand(SecondMI.getOpcode(), AMDGPU::OpName::bitop3)) {
167 const MachineOperand &Src2 =
168 *TII.getNamedOperand(SecondMI, AMDGPU::OpName::src2);
169 if (!Src2.isImm() || Src2.getImm())
170 return false;
171 }
172 if (AMDGPU::hasNamedOperand(FirstMI.getOpcode(), AMDGPU::OpName::bitop3)) {
173 const MachineOperand &Src2 =
174 *TII.getNamedOperand(FirstMI, AMDGPU::OpName::src2);
175 if (!Src2.isImm() || Src2.getImm())
176 return false;
177 }
178 }
179
180 LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
181 << "\n\tY: " << SecondMI << "\n");
182 return true;
183}
184
185/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
186/// together. Given SecondMI, when FirstMI is unspecified, then check if
187/// SecondMI may be part of a fused pair at all.
189 const TargetSubtargetInfo &TSI,
190 const MachineInstr *FirstMI,
191 const MachineInstr &SecondMI) {
192 const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
193 const GCNSubtarget &ST = STII.getSubtarget();
194 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST);
195 unsigned Opc2 = SecondMI.getOpcode();
196
197 const auto checkVOPD = [&](bool VOPD3) -> bool {
198 auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3);
199
200 // One instruction case
201 if (!FirstMI)
202 return SecondCanBeVOPD.Y || SecondCanBeVOPD.X;
203
204 unsigned Opc = FirstMI->getOpcode();
205 auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3);
206
207 if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
208 (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
209 return false;
210
211 return checkVOPDRegConstraints(STII, *FirstMI, SecondMI, VOPD3);
212 };
213
214 return checkVOPD(false) || (ST.hasVOPD3() && checkVOPD(true));
215}
216
217namespace {
218/// Adapts design from MacroFusion
219/// Puts valid candidate instructions back-to-back so they can easily
220/// be turned into VOPD instructions
221/// Greedily pairs instruction candidates. O(n^2) algorithm.
222struct VOPDPairingMutation : ScheduleDAGMutation {
223 MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
224
225 VOPDPairingMutation(
226 MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
228
229 void apply(ScheduleDAGInstrs *DAG) override {
230 const TargetInstrInfo &TII = *DAG->TII;
231 const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
232 if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
233 LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
234 return;
235 }
236
237 std::vector<SUnit>::iterator ISUI, JSUI;
238 for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
239 const MachineInstr *IMI = ISUI->getInstr();
240 if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
241 continue;
242 if (!hasLessThanNumFused(*ISUI, 2))
243 continue;
244
245 for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
246 if (JSUI->isBoundaryNode())
247 continue;
248 const MachineInstr *JMI = JSUI->getInstr();
249 if (!hasLessThanNumFused(*JSUI, 2) ||
250 !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
251 continue;
252 if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
253 break;
254 }
255 }
256 LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
257 }
258};
259} // namespace
260
261std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
262 return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
263}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
AMD GCN specific subclass of TargetSubtarget.
static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be scheduled together.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Register const TargetRegisterInfo * TRI
Interface definition for SIInstrInfo.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This class represents an Operation in the Expression.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:584
mop_range uses()
Returns all operands which may be register uses.
Definition: MachineInstr.h:731
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
const GCNSubtarget & getSubtarget() const
Definition: SIInstrInfo.h:239
A ScheduleDAG for scheduling lists of MachineInstr.
Mutate the DAG as a postpass after normal DAG building.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:584
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:588
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:586
size_t size() const
Definition: SmallVector.h:79
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
bool hasVOPD(const MCSubtargetInfo &STI)
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1315
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()
bool checkVOPDRegConstraints(const SIInstrInfo &TII, const MachineInstr &FirstMI, const MachineInstr &SecondMI, bool IsVOPD3)
LLVM_ABI bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU)
Create an artificial edge between FirstSU and SecondSU.
Definition: MacroFusion.cpp:53
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
bool(*)(const TargetInstrInfo &TII, const TargetSubtargetInfo &STI, const MachineInstr *FirstMI, const MachineInstr &SecondMI) MacroFusionPredTy
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Definition: MacroFusion.h:36
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
LLVM_ABI bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit)
Checks if the number of cluster edges between SU and its predecessors is less than FuseLimit.
Definition: MacroFusion.cpp:46