133 enum FtzRequirementTy {
143 SCP_FunnelShiftClamp,
148 struct SimplifyAction {
150 std::optional<Intrinsic::ID> IID;
151 std::optional<Instruction::CastOps> CastOp;
152 std::optional<Instruction::BinaryOps> BinaryOp;
153 std::optional<SpecialCase> Special;
155 FtzRequirementTy FtzRequirement = FTZ_Any;
158 bool IsHalfTy =
false;
160 SimplifyAction() =
default;
163 bool IsHalfTy =
false)
164 : IID(IID), FtzRequirement(FtzReq), IsHalfTy(IsHalfTy) {}
171 : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
173 SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
174 : Special(Special), FtzRequirement(FtzReq) {}
179 const SimplifyAction Action = [
II]() -> SimplifyAction {
180 switch (
II->getIntrinsicID()) {
182 case Intrinsic::nvvm_ceil_d:
183 return {Intrinsic::ceil, FTZ_Any};
184 case Intrinsic::nvvm_ceil_f:
185 return {Intrinsic::ceil, FTZ_MustBeOff};
186 case Intrinsic::nvvm_ceil_ftz_f:
187 return {Intrinsic::ceil, FTZ_MustBeOn};
188 case Intrinsic::nvvm_floor_d:
189 return {Intrinsic::floor, FTZ_Any};
190 case Intrinsic::nvvm_floor_f:
191 return {Intrinsic::floor, FTZ_MustBeOff};
192 case Intrinsic::nvvm_floor_ftz_f:
193 return {Intrinsic::floor, FTZ_MustBeOn};
194 case Intrinsic::nvvm_fma_rn_d:
195 return {Intrinsic::fma, FTZ_Any};
196 case Intrinsic::nvvm_fma_rn_f:
197 return {Intrinsic::fma, FTZ_MustBeOff};
198 case Intrinsic::nvvm_fma_rn_ftz_f:
199 return {Intrinsic::fma, FTZ_MustBeOn};
200 case Intrinsic::nvvm_fma_rn_f16:
201 return {Intrinsic::fma, FTZ_MustBeOff,
true};
202 case Intrinsic::nvvm_fma_rn_ftz_f16:
203 return {Intrinsic::fma, FTZ_MustBeOn,
true};
204 case Intrinsic::nvvm_fma_rn_f16x2:
205 return {Intrinsic::fma, FTZ_MustBeOff,
true};
206 case Intrinsic::nvvm_fma_rn_ftz_f16x2:
207 return {Intrinsic::fma, FTZ_MustBeOn,
true};
208 case Intrinsic::nvvm_fma_rn_bf16:
209 return {Intrinsic::fma, FTZ_MustBeOff,
true};
210 case Intrinsic::nvvm_fma_rn_ftz_bf16:
211 return {Intrinsic::fma, FTZ_MustBeOn,
true};
212 case Intrinsic::nvvm_fma_rn_bf16x2:
213 return {Intrinsic::fma, FTZ_MustBeOff,
true};
214 case Intrinsic::nvvm_fma_rn_ftz_bf16x2:
215 return {Intrinsic::fma, FTZ_MustBeOn,
true};
216 case Intrinsic::nvvm_fmax_d:
217 return {Intrinsic::maxnum, FTZ_Any};
218 case Intrinsic::nvvm_fmax_f:
219 return {Intrinsic::maxnum, FTZ_MustBeOff};
220 case Intrinsic::nvvm_fmax_ftz_f:
221 return {Intrinsic::maxnum, FTZ_MustBeOn};
222 case Intrinsic::nvvm_fmax_nan_f:
223 return {Intrinsic::maximum, FTZ_MustBeOff};
224 case Intrinsic::nvvm_fmax_ftz_nan_f:
225 return {Intrinsic::maximum, FTZ_MustBeOn};
226 case Intrinsic::nvvm_fmax_f16:
227 return {Intrinsic::maxnum, FTZ_MustBeOff,
true};
228 case Intrinsic::nvvm_fmax_ftz_f16:
229 return {Intrinsic::maxnum, FTZ_MustBeOn,
true};
230 case Intrinsic::nvvm_fmax_f16x2:
231 return {Intrinsic::maxnum, FTZ_MustBeOff,
true};
232 case Intrinsic::nvvm_fmax_ftz_f16x2:
233 return {Intrinsic::maxnum, FTZ_MustBeOn,
true};
234 case Intrinsic::nvvm_fmax_nan_f16:
235 return {Intrinsic::maximum, FTZ_MustBeOff,
true};
236 case Intrinsic::nvvm_fmax_ftz_nan_f16:
237 return {Intrinsic::maximum, FTZ_MustBeOn,
true};
238 case Intrinsic::nvvm_fmax_nan_f16x2:
239 return {Intrinsic::maximum, FTZ_MustBeOff,
true};
240 case Intrinsic::nvvm_fmax_ftz_nan_f16x2:
241 return {Intrinsic::maximum, FTZ_MustBeOn,
true};
242 case Intrinsic::nvvm_fmin_d:
243 return {Intrinsic::minnum, FTZ_Any};
244 case Intrinsic::nvvm_fmin_f:
245 return {Intrinsic::minnum, FTZ_MustBeOff};
246 case Intrinsic::nvvm_fmin_ftz_f:
247 return {Intrinsic::minnum, FTZ_MustBeOn};
248 case Intrinsic::nvvm_fmin_nan_f:
249 return {Intrinsic::minimum, FTZ_MustBeOff};
250 case Intrinsic::nvvm_fmin_ftz_nan_f:
251 return {Intrinsic::minimum, FTZ_MustBeOn};
252 case Intrinsic::nvvm_fmin_f16:
253 return {Intrinsic::minnum, FTZ_MustBeOff,
true};
254 case Intrinsic::nvvm_fmin_ftz_f16:
255 return {Intrinsic::minnum, FTZ_MustBeOn,
true};
256 case Intrinsic::nvvm_fmin_f16x2:
257 return {Intrinsic::minnum, FTZ_MustBeOff,
true};
258 case Intrinsic::nvvm_fmin_ftz_f16x2:
259 return {Intrinsic::minnum, FTZ_MustBeOn,
true};
260 case Intrinsic::nvvm_fmin_nan_f16:
261 return {Intrinsic::minimum, FTZ_MustBeOff,
true};
262 case Intrinsic::nvvm_fmin_ftz_nan_f16:
263 return {Intrinsic::minimum, FTZ_MustBeOn,
true};
264 case Intrinsic::nvvm_fmin_nan_f16x2:
265 return {Intrinsic::minimum, FTZ_MustBeOff,
true};
266 case Intrinsic::nvvm_fmin_ftz_nan_f16x2:
267 return {Intrinsic::minimum, FTZ_MustBeOn,
true};
268 case Intrinsic::nvvm_sqrt_rn_d:
269 return {Intrinsic::sqrt, FTZ_Any};
270 case Intrinsic::nvvm_sqrt_f:
275 return {Intrinsic::sqrt, FTZ_Any};
276 case Intrinsic::nvvm_trunc_d:
277 return {Intrinsic::trunc, FTZ_Any};
278 case Intrinsic::nvvm_trunc_f:
279 return {Intrinsic::trunc, FTZ_MustBeOff};
280 case Intrinsic::nvvm_trunc_ftz_f:
281 return {Intrinsic::trunc, FTZ_MustBeOn};
288 case Intrinsic::nvvm_d2i_rz:
289 case Intrinsic::nvvm_f2i_rz:
290 case Intrinsic::nvvm_d2ll_rz:
291 case Intrinsic::nvvm_f2ll_rz:
292 return {Instruction::FPToSI};
293 case Intrinsic::nvvm_d2ui_rz:
294 case Intrinsic::nvvm_f2ui_rz:
295 case Intrinsic::nvvm_d2ull_rz:
296 case Intrinsic::nvvm_f2ull_rz:
297 return {Instruction::FPToUI};
299 case Intrinsic::nvvm_i2d_rn:
300 case Intrinsic::nvvm_i2f_rn:
301 case Intrinsic::nvvm_ll2d_rn:
302 case Intrinsic::nvvm_ll2f_rn:
303 return {Instruction::SIToFP};
304 case Intrinsic::nvvm_ui2d_rn:
305 case Intrinsic::nvvm_ui2f_rn:
306 case Intrinsic::nvvm_ull2d_rn:
307 case Intrinsic::nvvm_ull2f_rn:
308 return {Instruction::UIToFP};
311 case Intrinsic::nvvm_div_rn_d:
312 return {Instruction::FDiv, FTZ_Any};
319 case Intrinsic::nvvm_rcp_rn_d:
320 return {SPC_Reciprocal, FTZ_Any};
322 case Intrinsic::nvvm_fshl_clamp:
323 case Intrinsic::nvvm_fshr_clamp:
324 return {SCP_FunnelShiftClamp, FTZ_Any};
355 if (Action.FtzRequirement != FTZ_Any) {
361 if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
370 Type *Tys[] = {
II->getArgOperand(0)->getType()};
379 II->getArgOperand(1),
II->getName());
390 switch (*Action.Special) {
394 Instruction::FDiv, ConstantFP::get(
II->getArgOperand(0)->getType(), 1),
395 II->getArgOperand(0),
II->getName());
397 case SCP_FunnelShiftClamp: {
401 const bool IsLeft =
II->getIntrinsicID() == Intrinsic::nvvm_fshl_clamp;
402 if (ShiftConst->getZExtValue() >=
II->getType()->getIntegerBitWidth())
405 const unsigned FshIID = IsLeft ? Intrinsic::fshl : Intrinsic::fshr;
407 II->getModule(), FshIID,
II->getType()),
413 llvm_unreachable(
"All SpecialCase enumerators should be handled in switch.");