LLVM 22.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsARM.h"
36#include "llvm/IR/IntrinsicsNVPTX.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
38#include "llvm/IR/IntrinsicsWebAssembly.h"
39#include "llvm/IR/IntrinsicsX86.h"
40#include "llvm/IR/LLVMContext.h"
41#include "llvm/IR/MDBuilder.h"
42#include "llvm/IR/Metadata.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/Value.h"
45#include "llvm/IR/Verifier.h"
50#include "llvm/Support/Regex.h"
53#include <cstdint>
54#include <cstring>
55#include <numeric>
56
57using namespace llvm;
58
59static cl::opt<bool>
60 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
61 cl::desc("Disable autoupgrade of debug info"));
62
63static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
64
65// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
66// changed their type from v4f32 to v2i64.
68 Function *&NewFn) {
69 // Check whether this is an old version of the function, which received
70 // v4f32 arguments.
71 Type *Arg0Type = F->getFunctionType()->getParamType(0);
72 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
73 return false;
74
75 // Yes, it's old, replace it with new version.
76 rename(F);
77 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
78 return true;
79}
80
81// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
82// arguments have changed their type from i32 to i8.
84 Function *&NewFn) {
85 // Check that the last argument is an i32.
86 Type *LastArgType = F->getFunctionType()->getParamType(
87 F->getFunctionType()->getNumParams() - 1);
88 if (!LastArgType->isIntegerTy(32))
89 return false;
90
91 // Move this function aside and map down.
92 rename(F);
93 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
94 return true;
95}
96
97// Upgrade the declaration of fp compare intrinsics that change return type
98// from scalar to vXi1 mask.
100 Function *&NewFn) {
101 // Check if the return type is a vector.
102 if (F->getReturnType()->isVectorTy())
103 return false;
104
105 rename(F);
106 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
107 return true;
108}
109
110// Upgrade the declaration of multiply and add bytes intrinsics whose input
111// arguments' types have changed from vectors of i32 to vectors of i8
113 Function *&NewFn) {
114 // check if input argument type is a vector of i8
115 Type *Arg1Type = F->getFunctionType()->getParamType(1);
116 Type *Arg2Type = F->getFunctionType()->getParamType(2);
117 if (Arg1Type->isVectorTy() &&
118 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
119 Arg2Type->isVectorTy() &&
120 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
121 return false;
122
123 rename(F);
124 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
125 return true;
126}
127
129 Function *&NewFn) {
130 if (F->getReturnType()->getScalarType()->isBFloatTy())
131 return false;
132
133 rename(F);
134 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
135 return true;
136}
137
139 Function *&NewFn) {
140 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
141 return false;
142
143 rename(F);
144 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
145 return true;
146}
147
149 // All of the intrinsics matches below should be marked with which llvm
150 // version started autoupgrading them. At some point in the future we would
151 // like to use this information to remove upgrade code for some older
152 // intrinsics. It is currently undecided how we will determine that future
153 // point.
154 if (Name.consume_front("avx."))
155 return (Name.starts_with("blend.p") || // Added in 3.7
156 Name == "cvt.ps2.pd.256" || // Added in 3.9
157 Name == "cvtdq2.pd.256" || // Added in 3.9
158 Name == "cvtdq2.ps.256" || // Added in 7.0
159 Name.starts_with("movnt.") || // Added in 3.2
160 Name.starts_with("sqrt.p") || // Added in 7.0
161 Name.starts_with("storeu.") || // Added in 3.9
162 Name.starts_with("vbroadcast.s") || // Added in 3.5
163 Name.starts_with("vbroadcastf128") || // Added in 4.0
164 Name.starts_with("vextractf128.") || // Added in 3.7
165 Name.starts_with("vinsertf128.") || // Added in 3.7
166 Name.starts_with("vperm2f128.") || // Added in 6.0
167 Name.starts_with("vpermil.")); // Added in 3.1
168
169 if (Name.consume_front("avx2."))
170 return (Name == "movntdqa" || // Added in 5.0
171 Name.starts_with("pabs.") || // Added in 6.0
172 Name.starts_with("padds.") || // Added in 8.0
173 Name.starts_with("paddus.") || // Added in 8.0
174 Name.starts_with("pblendd.") || // Added in 3.7
175 Name == "pblendw" || // Added in 3.7
176 Name.starts_with("pbroadcast") || // Added in 3.8
177 Name.starts_with("pcmpeq.") || // Added in 3.1
178 Name.starts_with("pcmpgt.") || // Added in 3.1
179 Name.starts_with("pmax") || // Added in 3.9
180 Name.starts_with("pmin") || // Added in 3.9
181 Name.starts_with("pmovsx") || // Added in 3.9
182 Name.starts_with("pmovzx") || // Added in 3.9
183 Name == "pmul.dq" || // Added in 7.0
184 Name == "pmulu.dq" || // Added in 7.0
185 Name.starts_with("psll.dq") || // Added in 3.7
186 Name.starts_with("psrl.dq") || // Added in 3.7
187 Name.starts_with("psubs.") || // Added in 8.0
188 Name.starts_with("psubus.") || // Added in 8.0
189 Name.starts_with("vbroadcast") || // Added in 3.8
190 Name == "vbroadcasti128" || // Added in 3.7
191 Name == "vextracti128" || // Added in 3.7
192 Name == "vinserti128" || // Added in 3.7
193 Name == "vperm2i128"); // Added in 6.0
194
195 if (Name.consume_front("avx512.")) {
196 if (Name.consume_front("mask."))
197 // 'avx512.mask.*'
198 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
199 Name.starts_with("and.") || // Added in 3.9
200 Name.starts_with("andn.") || // Added in 3.9
201 Name.starts_with("broadcast.s") || // Added in 3.9
202 Name.starts_with("broadcastf32x4.") || // Added in 6.0
203 Name.starts_with("broadcastf32x8.") || // Added in 6.0
204 Name.starts_with("broadcastf64x2.") || // Added in 6.0
205 Name.starts_with("broadcastf64x4.") || // Added in 6.0
206 Name.starts_with("broadcasti32x4.") || // Added in 6.0
207 Name.starts_with("broadcasti32x8.") || // Added in 6.0
208 Name.starts_with("broadcasti64x2.") || // Added in 6.0
209 Name.starts_with("broadcasti64x4.") || // Added in 6.0
210 Name.starts_with("cmp.b") || // Added in 5.0
211 Name.starts_with("cmp.d") || // Added in 5.0
212 Name.starts_with("cmp.q") || // Added in 5.0
213 Name.starts_with("cmp.w") || // Added in 5.0
214 Name.starts_with("compress.b") || // Added in 9.0
215 Name.starts_with("compress.d") || // Added in 9.0
216 Name.starts_with("compress.p") || // Added in 9.0
217 Name.starts_with("compress.q") || // Added in 9.0
218 Name.starts_with("compress.store.") || // Added in 7.0
219 Name.starts_with("compress.w") || // Added in 9.0
220 Name.starts_with("conflict.") || // Added in 9.0
221 Name.starts_with("cvtdq2pd.") || // Added in 4.0
222 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
223 Name == "cvtpd2dq.256" || // Added in 7.0
224 Name == "cvtpd2ps.256" || // Added in 7.0
225 Name == "cvtps2pd.128" || // Added in 7.0
226 Name == "cvtps2pd.256" || // Added in 7.0
227 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
228 Name == "cvtqq2ps.256" || // Added in 9.0
229 Name == "cvtqq2ps.512" || // Added in 9.0
230 Name == "cvttpd2dq.256" || // Added in 7.0
231 Name == "cvttps2dq.128" || // Added in 7.0
232 Name == "cvttps2dq.256" || // Added in 7.0
233 Name.starts_with("cvtudq2pd.") || // Added in 4.0
234 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
235 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
236 Name == "cvtuqq2ps.256" || // Added in 9.0
237 Name == "cvtuqq2ps.512" || // Added in 9.0
238 Name.starts_with("dbpsadbw.") || // Added in 7.0
239 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
240 Name.starts_with("expand.b") || // Added in 9.0
241 Name.starts_with("expand.d") || // Added in 9.0
242 Name.starts_with("expand.load.") || // Added in 7.0
243 Name.starts_with("expand.p") || // Added in 9.0
244 Name.starts_with("expand.q") || // Added in 9.0
245 Name.starts_with("expand.w") || // Added in 9.0
246 Name.starts_with("fpclass.p") || // Added in 7.0
247 Name.starts_with("insert") || // Added in 4.0
248 Name.starts_with("load.") || // Added in 3.9
249 Name.starts_with("loadu.") || // Added in 3.9
250 Name.starts_with("lzcnt.") || // Added in 5.0
251 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
252 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
253 Name.starts_with("movddup") || // Added in 3.9
254 Name.starts_with("move.s") || // Added in 4.0
255 Name.starts_with("movshdup") || // Added in 3.9
256 Name.starts_with("movsldup") || // Added in 3.9
257 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
258 Name.starts_with("or.") || // Added in 3.9
259 Name.starts_with("pabs.") || // Added in 6.0
260 Name.starts_with("packssdw.") || // Added in 5.0
261 Name.starts_with("packsswb.") || // Added in 5.0
262 Name.starts_with("packusdw.") || // Added in 5.0
263 Name.starts_with("packuswb.") || // Added in 5.0
264 Name.starts_with("padd.") || // Added in 4.0
265 Name.starts_with("padds.") || // Added in 8.0
266 Name.starts_with("paddus.") || // Added in 8.0
267 Name.starts_with("palignr.") || // Added in 3.9
268 Name.starts_with("pand.") || // Added in 3.9
269 Name.starts_with("pandn.") || // Added in 3.9
270 Name.starts_with("pavg") || // Added in 6.0
271 Name.starts_with("pbroadcast") || // Added in 6.0
272 Name.starts_with("pcmpeq.") || // Added in 3.9
273 Name.starts_with("pcmpgt.") || // Added in 3.9
274 Name.starts_with("perm.df.") || // Added in 3.9
275 Name.starts_with("perm.di.") || // Added in 3.9
276 Name.starts_with("permvar.") || // Added in 7.0
277 Name.starts_with("pmaddubs.w.") || // Added in 7.0
278 Name.starts_with("pmaddw.d.") || // Added in 7.0
279 Name.starts_with("pmax") || // Added in 4.0
280 Name.starts_with("pmin") || // Added in 4.0
281 Name == "pmov.qd.256" || // Added in 9.0
282 Name == "pmov.qd.512" || // Added in 9.0
283 Name == "pmov.wb.256" || // Added in 9.0
284 Name == "pmov.wb.512" || // Added in 9.0
285 Name.starts_with("pmovsx") || // Added in 4.0
286 Name.starts_with("pmovzx") || // Added in 4.0
287 Name.starts_with("pmul.dq.") || // Added in 4.0
288 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
289 Name.starts_with("pmulh.w.") || // Added in 7.0
290 Name.starts_with("pmulhu.w.") || // Added in 7.0
291 Name.starts_with("pmull.") || // Added in 4.0
292 Name.starts_with("pmultishift.qb.") || // Added in 8.0
293 Name.starts_with("pmulu.dq.") || // Added in 4.0
294 Name.starts_with("por.") || // Added in 3.9
295 Name.starts_with("prol.") || // Added in 8.0
296 Name.starts_with("prolv.") || // Added in 8.0
297 Name.starts_with("pror.") || // Added in 8.0
298 Name.starts_with("prorv.") || // Added in 8.0
299 Name.starts_with("pshuf.b.") || // Added in 4.0
300 Name.starts_with("pshuf.d.") || // Added in 3.9
301 Name.starts_with("pshufh.w.") || // Added in 3.9
302 Name.starts_with("pshufl.w.") || // Added in 3.9
303 Name.starts_with("psll.d") || // Added in 4.0
304 Name.starts_with("psll.q") || // Added in 4.0
305 Name.starts_with("psll.w") || // Added in 4.0
306 Name.starts_with("pslli") || // Added in 4.0
307 Name.starts_with("psllv") || // Added in 4.0
308 Name.starts_with("psra.d") || // Added in 4.0
309 Name.starts_with("psra.q") || // Added in 4.0
310 Name.starts_with("psra.w") || // Added in 4.0
311 Name.starts_with("psrai") || // Added in 4.0
312 Name.starts_with("psrav") || // Added in 4.0
313 Name.starts_with("psrl.d") || // Added in 4.0
314 Name.starts_with("psrl.q") || // Added in 4.0
315 Name.starts_with("psrl.w") || // Added in 4.0
316 Name.starts_with("psrli") || // Added in 4.0
317 Name.starts_with("psrlv") || // Added in 4.0
318 Name.starts_with("psub.") || // Added in 4.0
319 Name.starts_with("psubs.") || // Added in 8.0
320 Name.starts_with("psubus.") || // Added in 8.0
321 Name.starts_with("pternlog.") || // Added in 7.0
322 Name.starts_with("punpckh") || // Added in 3.9
323 Name.starts_with("punpckl") || // Added in 3.9
324 Name.starts_with("pxor.") || // Added in 3.9
325 Name.starts_with("shuf.f") || // Added in 6.0
326 Name.starts_with("shuf.i") || // Added in 6.0
327 Name.starts_with("shuf.p") || // Added in 4.0
328 Name.starts_with("sqrt.p") || // Added in 7.0
329 Name.starts_with("store.b.") || // Added in 3.9
330 Name.starts_with("store.d.") || // Added in 3.9
331 Name.starts_with("store.p") || // Added in 3.9
332 Name.starts_with("store.q.") || // Added in 3.9
333 Name.starts_with("store.w.") || // Added in 3.9
334 Name == "store.ss" || // Added in 7.0
335 Name.starts_with("storeu.") || // Added in 3.9
336 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
337 Name.starts_with("ucmp.") || // Added in 5.0
338 Name.starts_with("unpckh.") || // Added in 3.9
339 Name.starts_with("unpckl.") || // Added in 3.9
340 Name.starts_with("valign.") || // Added in 4.0
341 Name == "vcvtph2ps.128" || // Added in 11.0
342 Name == "vcvtph2ps.256" || // Added in 11.0
343 Name.starts_with("vextract") || // Added in 4.0
344 Name.starts_with("vfmadd.") || // Added in 7.0
345 Name.starts_with("vfmaddsub.") || // Added in 7.0
346 Name.starts_with("vfnmadd.") || // Added in 7.0
347 Name.starts_with("vfnmsub.") || // Added in 7.0
348 Name.starts_with("vpdpbusd.") || // Added in 7.0
349 Name.starts_with("vpdpbusds.") || // Added in 7.0
350 Name.starts_with("vpdpwssd.") || // Added in 7.0
351 Name.starts_with("vpdpwssds.") || // Added in 7.0
352 Name.starts_with("vpermi2var.") || // Added in 7.0
353 Name.starts_with("vpermil.p") || // Added in 3.9
354 Name.starts_with("vpermilvar.") || // Added in 4.0
355 Name.starts_with("vpermt2var.") || // Added in 7.0
356 Name.starts_with("vpmadd52") || // Added in 7.0
357 Name.starts_with("vpshld.") || // Added in 7.0
358 Name.starts_with("vpshldv.") || // Added in 8.0
359 Name.starts_with("vpshrd.") || // Added in 7.0
360 Name.starts_with("vpshrdv.") || // Added in 8.0
361 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
362 Name.starts_with("xor.")); // Added in 3.9
363
364 if (Name.consume_front("mask3."))
365 // 'avx512.mask3.*'
366 return (Name.starts_with("vfmadd.") || // Added in 7.0
367 Name.starts_with("vfmaddsub.") || // Added in 7.0
368 Name.starts_with("vfmsub.") || // Added in 7.0
369 Name.starts_with("vfmsubadd.") || // Added in 7.0
370 Name.starts_with("vfnmsub.")); // Added in 7.0
371
372 if (Name.consume_front("maskz."))
373 // 'avx512.maskz.*'
374 return (Name.starts_with("pternlog.") || // Added in 7.0
375 Name.starts_with("vfmadd.") || // Added in 7.0
376 Name.starts_with("vfmaddsub.") || // Added in 7.0
377 Name.starts_with("vpdpbusd.") || // Added in 7.0
378 Name.starts_with("vpdpbusds.") || // Added in 7.0
379 Name.starts_with("vpdpwssd.") || // Added in 7.0
380 Name.starts_with("vpdpwssds.") || // Added in 7.0
381 Name.starts_with("vpermt2var.") || // Added in 7.0
382 Name.starts_with("vpmadd52") || // Added in 7.0
383 Name.starts_with("vpshldv.") || // Added in 8.0
384 Name.starts_with("vpshrdv.")); // Added in 8.0
385
386 // 'avx512.*'
387 return (Name == "movntdqa" || // Added in 5.0
388 Name == "pmul.dq.512" || // Added in 7.0
389 Name == "pmulu.dq.512" || // Added in 7.0
390 Name.starts_with("broadcastm") || // Added in 6.0
391 Name.starts_with("cmp.p") || // Added in 12.0
392 Name.starts_with("cvtb2mask.") || // Added in 7.0
393 Name.starts_with("cvtd2mask.") || // Added in 7.0
394 Name.starts_with("cvtmask2") || // Added in 5.0
395 Name.starts_with("cvtq2mask.") || // Added in 7.0
396 Name == "cvtusi2sd" || // Added in 7.0
397 Name.starts_with("cvtw2mask.") || // Added in 7.0
398 Name == "kand.w" || // Added in 7.0
399 Name == "kandn.w" || // Added in 7.0
400 Name == "knot.w" || // Added in 7.0
401 Name == "kor.w" || // Added in 7.0
402 Name == "kortestc.w" || // Added in 7.0
403 Name == "kortestz.w" || // Added in 7.0
404 Name.starts_with("kunpck") || // added in 6.0
405 Name == "kxnor.w" || // Added in 7.0
406 Name == "kxor.w" || // Added in 7.0
407 Name.starts_with("padds.") || // Added in 8.0
408 Name.starts_with("pbroadcast") || // Added in 3.9
409 Name.starts_with("prol") || // Added in 8.0
410 Name.starts_with("pror") || // Added in 8.0
411 Name.starts_with("psll.dq") || // Added in 3.9
412 Name.starts_with("psrl.dq") || // Added in 3.9
413 Name.starts_with("psubs.") || // Added in 8.0
414 Name.starts_with("ptestm") || // Added in 6.0
415 Name.starts_with("ptestnm") || // Added in 6.0
416 Name.starts_with("storent.") || // Added in 3.9
417 Name.starts_with("vbroadcast.s") || // Added in 7.0
418 Name.starts_with("vpshld.") || // Added in 8.0
419 Name.starts_with("vpshrd.")); // Added in 8.0
420 }
421
422 if (Name.consume_front("fma."))
423 return (Name.starts_with("vfmadd.") || // Added in 7.0
424 Name.starts_with("vfmsub.") || // Added in 7.0
425 Name.starts_with("vfmsubadd.") || // Added in 7.0
426 Name.starts_with("vfnmadd.") || // Added in 7.0
427 Name.starts_with("vfnmsub.")); // Added in 7.0
428
429 if (Name.consume_front("fma4."))
430 return Name.starts_with("vfmadd.s"); // Added in 7.0
431
432 if (Name.consume_front("sse."))
433 return (Name == "add.ss" || // Added in 4.0
434 Name == "cvtsi2ss" || // Added in 7.0
435 Name == "cvtsi642ss" || // Added in 7.0
436 Name == "div.ss" || // Added in 4.0
437 Name == "mul.ss" || // Added in 4.0
438 Name.starts_with("sqrt.p") || // Added in 7.0
439 Name == "sqrt.ss" || // Added in 7.0
440 Name.starts_with("storeu.") || // Added in 3.9
441 Name == "sub.ss"); // Added in 4.0
442
443 if (Name.consume_front("sse2."))
444 return (Name == "add.sd" || // Added in 4.0
445 Name == "cvtdq2pd" || // Added in 3.9
446 Name == "cvtdq2ps" || // Added in 7.0
447 Name == "cvtps2pd" || // Added in 3.9
448 Name == "cvtsi2sd" || // Added in 7.0
449 Name == "cvtsi642sd" || // Added in 7.0
450 Name == "cvtss2sd" || // Added in 7.0
451 Name == "div.sd" || // Added in 4.0
452 Name == "mul.sd" || // Added in 4.0
453 Name.starts_with("padds.") || // Added in 8.0
454 Name.starts_with("paddus.") || // Added in 8.0
455 Name.starts_with("pcmpeq.") || // Added in 3.1
456 Name.starts_with("pcmpgt.") || // Added in 3.1
457 Name == "pmaxs.w" || // Added in 3.9
458 Name == "pmaxu.b" || // Added in 3.9
459 Name == "pmins.w" || // Added in 3.9
460 Name == "pminu.b" || // Added in 3.9
461 Name == "pmulu.dq" || // Added in 7.0
462 Name.starts_with("pshuf") || // Added in 3.9
463 Name.starts_with("psll.dq") || // Added in 3.7
464 Name.starts_with("psrl.dq") || // Added in 3.7
465 Name.starts_with("psubs.") || // Added in 8.0
466 Name.starts_with("psubus.") || // Added in 8.0
467 Name.starts_with("sqrt.p") || // Added in 7.0
468 Name == "sqrt.sd" || // Added in 7.0
469 Name == "storel.dq" || // Added in 3.9
470 Name.starts_with("storeu.") || // Added in 3.9
471 Name == "sub.sd"); // Added in 4.0
472
473 if (Name.consume_front("sse41."))
474 return (Name.starts_with("blendp") || // Added in 3.7
475 Name == "movntdqa" || // Added in 5.0
476 Name == "pblendw" || // Added in 3.7
477 Name == "pmaxsb" || // Added in 3.9
478 Name == "pmaxsd" || // Added in 3.9
479 Name == "pmaxud" || // Added in 3.9
480 Name == "pmaxuw" || // Added in 3.9
481 Name == "pminsb" || // Added in 3.9
482 Name == "pminsd" || // Added in 3.9
483 Name == "pminud" || // Added in 3.9
484 Name == "pminuw" || // Added in 3.9
485 Name.starts_with("pmovsx") || // Added in 3.8
486 Name.starts_with("pmovzx") || // Added in 3.9
487 Name == "pmuldq"); // Added in 7.0
488
489 if (Name.consume_front("sse42."))
490 return Name == "crc32.64.8"; // Added in 3.4
491
492 if (Name.consume_front("sse4a."))
493 return Name.starts_with("movnt."); // Added in 3.9
494
495 if (Name.consume_front("ssse3."))
496 return (Name == "pabs.b.128" || // Added in 6.0
497 Name == "pabs.d.128" || // Added in 6.0
498 Name == "pabs.w.128"); // Added in 6.0
499
500 if (Name.consume_front("xop."))
501 return (Name == "vpcmov" || // Added in 3.8
502 Name == "vpcmov.256" || // Added in 5.0
503 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
504 Name.starts_with("vprot")); // Added in 8.0
505
506 return (Name == "addcarry.u32" || // Added in 8.0
507 Name == "addcarry.u64" || // Added in 8.0
508 Name == "addcarryx.u32" || // Added in 8.0
509 Name == "addcarryx.u64" || // Added in 8.0
510 Name == "subborrow.u32" || // Added in 8.0
511 Name == "subborrow.u64" || // Added in 8.0
512 Name.starts_with("vcvtph2ps.")); // Added in 11.0
513}
514
516 Function *&NewFn) {
517 // Only handle intrinsics that start with "x86.".
518 if (!Name.consume_front("x86."))
519 return false;
520
521 if (shouldUpgradeX86Intrinsic(F, Name)) {
522 NewFn = nullptr;
523 return true;
524 }
525
526 if (Name == "rdtscp") { // Added in 8.0
527 // If this intrinsic has 0 operands, it's the new version.
528 if (F->getFunctionType()->getNumParams() == 0)
529 return false;
530
531 rename(F);
532 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
533 Intrinsic::x86_rdtscp);
534 return true;
535 }
536
538
539 // SSE4.1 ptest functions may have an old signature.
540 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
542 .Case("c", Intrinsic::x86_sse41_ptestc)
543 .Case("z", Intrinsic::x86_sse41_ptestz)
544 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
547 return upgradePTESTIntrinsic(F, ID, NewFn);
548
549 return false;
550 }
551
552 // Several blend and other instructions with masks used the wrong number of
553 // bits.
554
555 // Added in 3.6
557 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
558 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
559 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
560 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
561 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
562 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
565 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
566
567 if (Name.consume_front("avx512.")) {
568 if (Name.consume_front("mask.cmp.")) {
569 // Added in 7.0
571 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
572 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
573 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
574 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
575 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
576 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
579 return upgradeX86MaskedFPCompare(F, ID, NewFn);
580 } else if (Name.starts_with("vpdpbusd.") ||
581 Name.starts_with("vpdpbusds.")) {
582 // Added in 21.1
584 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
585 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
586 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
587 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
588 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
589 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
592 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
593 }
594 return false; // No other 'x86.avx512.*'.
595 }
596
597 if (Name.consume_front("avx512bf16.")) {
598 // Added in 9.0
600 .Case("cvtne2ps2bf16.128",
601 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
602 .Case("cvtne2ps2bf16.256",
603 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
604 .Case("cvtne2ps2bf16.512",
605 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
606 .Case("mask.cvtneps2bf16.128",
607 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
608 .Case("cvtneps2bf16.256",
609 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
610 .Case("cvtneps2bf16.512",
611 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
614 return upgradeX86BF16Intrinsic(F, ID, NewFn);
615
616 // Added in 9.0
618 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
619 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
620 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
623 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
624 return false; // No other 'x86.avx512bf16.*'.
625 }
626
627 if (Name.consume_front("xop.")) {
629 if (Name.starts_with("vpermil2")) { // Added in 3.9
630 // Upgrade any XOP PERMIL2 index operand still using a float/double
631 // vector.
632 auto Idx = F->getFunctionType()->getParamType(2);
633 if (Idx->isFPOrFPVectorTy()) {
634 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
635 unsigned EltSize = Idx->getScalarSizeInBits();
636 if (EltSize == 64 && IdxSize == 128)
637 ID = Intrinsic::x86_xop_vpermil2pd;
638 else if (EltSize == 32 && IdxSize == 128)
639 ID = Intrinsic::x86_xop_vpermil2ps;
640 else if (EltSize == 64 && IdxSize == 256)
641 ID = Intrinsic::x86_xop_vpermil2pd_256;
642 else
643 ID = Intrinsic::x86_xop_vpermil2ps_256;
644 }
645 } else if (F->arg_size() == 2)
646 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
648 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
649 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
651
653 rename(F);
654 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
655 return true;
656 }
657 return false; // No other 'x86.xop.*'
658 }
659
660 if (Name == "seh.recoverfp") {
661 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
662 Intrinsic::eh_recoverfp);
663 return true;
664 }
665
666 return false;
667}
668
669// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
670// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
672 StringRef Name,
673 Function *&NewFn) {
674 if (Name.starts_with("rbit")) {
675 // '(arm|aarch64).rbit'.
677 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
678 return true;
679 }
680
681 if (Name == "thread.pointer") {
682 // '(arm|aarch64).thread.pointer'.
684 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
685 return true;
686 }
687
688 bool Neon = Name.consume_front("neon.");
689 if (Neon) {
690 // '(arm|aarch64).neon.*'.
691 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
692 // v16i8 respectively.
693 if (Name.consume_front("bfdot.")) {
694 // (arm|aarch64).neon.bfdot.*'.
697 .Cases("v2f32.v8i8", "v4f32.v16i8",
698 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
699 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
702 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
703 assert((OperandWidth == 64 || OperandWidth == 128) &&
704 "Unexpected operand width");
705 LLVMContext &Ctx = F->getParent()->getContext();
706 std::array<Type *, 2> Tys{
707 {F->getReturnType(),
708 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
709 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
710 return true;
711 }
712 return false; // No other '(arm|aarch64).neon.bfdot.*'.
713 }
714
715 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
716 // anymore and accept v8bf16 instead of v16i8.
717 if (Name.consume_front("bfm")) {
718 // (arm|aarch64).neon.bfm*'.
719 if (Name.consume_back(".v4f32.v16i8")) {
720 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
723 .Case("mla",
724 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
725 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
726 .Case("lalb",
727 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
728 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
729 .Case("lalt",
730 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
731 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
734 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
735 return true;
736 }
737 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
738 }
739 return false; // No other '(arm|aarch64).neon.bfm*.
740 }
741 // Continue on to Aarch64 Neon or Arm Neon.
742 }
743 // Continue on to Arm or Aarch64.
744
745 if (IsArm) {
746 // 'arm.*'.
747 if (Neon) {
748 // 'arm.neon.*'.
750 .StartsWith("vclz.", Intrinsic::ctlz)
751 .StartsWith("vcnt.", Intrinsic::ctpop)
752 .StartsWith("vqadds.", Intrinsic::sadd_sat)
753 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
754 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
755 .StartsWith("vqsubu.", Intrinsic::usub_sat)
756 .StartsWith("vrinta.", Intrinsic::round)
757 .StartsWith("vrintn.", Intrinsic::roundeven)
758 .StartsWith("vrintm.", Intrinsic::floor)
759 .StartsWith("vrintp.", Intrinsic::ceil)
760 .StartsWith("vrintx.", Intrinsic::rint)
761 .StartsWith("vrintz.", Intrinsic::trunc)
764 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
765 F->arg_begin()->getType());
766 return true;
767 }
768
769 if (Name.consume_front("vst")) {
770 // 'arm.neon.vst*'.
771 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
773 if (vstRegex.match(Name, &Groups)) {
774 static const Intrinsic::ID StoreInts[] = {
775 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
776 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
777
778 static const Intrinsic::ID StoreLaneInts[] = {
779 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
780 Intrinsic::arm_neon_vst4lane};
781
782 auto fArgs = F->getFunctionType()->params();
783 Type *Tys[] = {fArgs[0], fArgs[1]};
784 if (Groups[1].size() == 1)
786 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
787 else
789 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
790 return true;
791 }
792 return false; // No other 'arm.neon.vst*'.
793 }
794
795 return false; // No other 'arm.neon.*'.
796 }
797
798 if (Name.consume_front("mve.")) {
799 // 'arm.mve.*'.
800 if (Name == "vctp64") {
801 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
802 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
803 // the function and deal with it below in UpgradeIntrinsicCall.
804 rename(F);
805 return true;
806 }
807 return false; // Not 'arm.mve.vctp64'.
808 }
809
810 if (Name.starts_with("vrintn.v")) {
812 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
813 return true;
814 }
815
816 // These too are changed to accept a v2i1 instead of the old v4i1.
817 if (Name.consume_back(".v4i1")) {
818 // 'arm.mve.*.v4i1'.
819 if (Name.consume_back(".predicated.v2i64.v4i32"))
820 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
821 return Name == "mull.int" || Name == "vqdmull";
822
823 if (Name.consume_back(".v2i64")) {
824 // 'arm.mve.*.v2i64.v4i1'
825 bool IsGather = Name.consume_front("vldr.gather.");
826 if (IsGather || Name.consume_front("vstr.scatter.")) {
827 if (Name.consume_front("base.")) {
828 // Optional 'wb.' prefix.
829 Name.consume_front("wb.");
830 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
831 // predicated.v2i64.v2i64.v4i1'.
832 return Name == "predicated.v2i64";
833 }
834
835 if (Name.consume_front("offset.predicated."))
836 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
837 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
838
839 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
840 return false;
841 }
842
843 return false; // No other 'arm.mve.*.v2i64.v4i1'.
844 }
845 return false; // No other 'arm.mve.*.v4i1'.
846 }
847 return false; // No other 'arm.mve.*'.
848 }
849
850 if (Name.consume_front("cde.vcx")) {
851 // 'arm.cde.vcx*'.
852 if (Name.consume_back(".predicated.v2i64.v4i1"))
853 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
854 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
855 Name == "3q" || Name == "3qa";
856
857 return false; // No other 'arm.cde.vcx*'.
858 }
859 } else {
860 // 'aarch64.*'.
861 if (Neon) {
862 // 'aarch64.neon.*'.
864 .StartsWith("frintn", Intrinsic::roundeven)
865 .StartsWith("rbit", Intrinsic::bitreverse)
868 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
869 F->arg_begin()->getType());
870 return true;
871 }
872
873 if (Name.starts_with("addp")) {
874 // 'aarch64.neon.addp*'.
875 if (F->arg_size() != 2)
876 return false; // Invalid IR.
877 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
878 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
880 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
881 return true;
882 }
883 }
884
885 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
886 if (Name.starts_with("bfcvt")) {
887 NewFn = nullptr;
888 return true;
889 }
890
891 return false; // No other 'aarch64.neon.*'.
892 }
893 if (Name.consume_front("sve.")) {
894 // 'aarch64.sve.*'.
895 if (Name.consume_front("bf")) {
896 if (Name.consume_back(".lane")) {
897 // 'aarch64.sve.bf*.lane'.
900 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
901 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
902 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
905 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
906 return true;
907 }
908 return false; // No other 'aarch64.sve.bf*.lane'.
909 }
910 return false; // No other 'aarch64.sve.bf*'.
911 }
912
913 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
914 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
915 NewFn = nullptr;
916 return true;
917 }
918
919 if (Name.consume_front("addqv")) {
920 // 'aarch64.sve.addqv'.
921 if (!F->getReturnType()->isFPOrFPVectorTy())
922 return false;
923
924 auto Args = F->getFunctionType()->params();
925 Type *Tys[] = {F->getReturnType(), Args[1]};
927 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
928 return true;
929 }
930
931 if (Name.consume_front("ld")) {
932 // 'aarch64.sve.ld*'.
933 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
934 if (LdRegex.match(Name)) {
935 Type *ScalarTy =
936 cast<VectorType>(F->getReturnType())->getElementType();
937 ElementCount EC =
938 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
939 Type *Ty = VectorType::get(ScalarTy, EC);
940 static const Intrinsic::ID LoadIDs[] = {
941 Intrinsic::aarch64_sve_ld2_sret,
942 Intrinsic::aarch64_sve_ld3_sret,
943 Intrinsic::aarch64_sve_ld4_sret,
944 };
945 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
946 LoadIDs[Name[0] - '2'], Ty);
947 return true;
948 }
949 return false; // No other 'aarch64.sve.ld*'.
950 }
951
952 if (Name.consume_front("tuple.")) {
953 // 'aarch64.sve.tuple.*'.
954 if (Name.starts_with("get")) {
955 // 'aarch64.sve.tuple.get*'.
956 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
958 F->getParent(), Intrinsic::vector_extract, Tys);
959 return true;
960 }
961
962 if (Name.starts_with("set")) {
963 // 'aarch64.sve.tuple.set*'.
964 auto Args = F->getFunctionType()->params();
965 Type *Tys[] = {Args[0], Args[2], Args[1]};
967 F->getParent(), Intrinsic::vector_insert, Tys);
968 return true;
969 }
970
971 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
972 if (CreateTupleRegex.match(Name)) {
973 // 'aarch64.sve.tuple.create*'.
974 auto Args = F->getFunctionType()->params();
975 Type *Tys[] = {F->getReturnType(), Args[1]};
977 F->getParent(), Intrinsic::vector_insert, Tys);
978 return true;
979 }
980 return false; // No other 'aarch64.sve.tuple.*'.
981 }
982 return false; // No other 'aarch64.sve.*'.
983 }
984 }
985 return false; // No other 'arm.*', 'aarch64.*'.
986}
987
989 StringRef Name) {
990 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
993 .Case("im2col.3d",
994 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
995 .Case("im2col.4d",
996 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
997 .Case("im2col.5d",
998 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
999 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1000 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1001 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1002 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1003 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1005
1007 return ID;
1008
1009 // These intrinsics may need upgrade for two reasons:
1010 // (1) When the address-space of the first argument is shared[AS=3]
1011 // (and we upgrade it to use shared_cluster address-space[AS=7])
1012 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1014 return ID;
1015
1016 // (2) When there are only two boolean flag arguments at the end:
1017 //
1018 // The last three parameters of the older version of these
1019 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1020 //
1021 // The newer version reads as:
1022 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1023 //
1024 // So, when the type of the [N-3]rd argument is "not i1", then
1025 // it is the older version and we need to upgrade.
1026 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1027 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1028 if (!ArgType->isIntegerTy(1))
1029 return ID;
1030 }
1031
1033}
1034
1036 StringRef Name) {
1037 if (Name.consume_front("mapa.shared.cluster"))
1038 if (F->getReturnType()->getPointerAddressSpace() ==
1040 return Intrinsic::nvvm_mapa_shared_cluster;
1041
1042 if (Name.consume_front("cp.async.bulk.")) {
1045 .Case("global.to.shared.cluster",
1046 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1047 .Case("shared.cta.to.cluster",
1048 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1050
1052 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1054 return ID;
1055 }
1056
1058}
1059
1061 if (Name.consume_front("fma.rn."))
1062 return StringSwitch<Intrinsic::ID>(Name)
1063 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1064 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1065 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1066 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1067 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1068 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1069 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1070 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1071 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1072 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1073 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1074 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1076
1077 if (Name.consume_front("fmax."))
1078 return StringSwitch<Intrinsic::ID>(Name)
1079 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1080 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1081 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1082 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1083 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1084 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1085 .Case("ftz.nan.xorsign.abs.bf16",
1086 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1087 .Case("ftz.nan.xorsign.abs.bf16x2",
1088 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1089 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1090 .Case("ftz.xorsign.abs.bf16x2",
1091 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1092 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1093 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1094 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1095 .Case("nan.xorsign.abs.bf16x2",
1096 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1097 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1098 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1100
1101 if (Name.consume_front("fmin."))
1102 return StringSwitch<Intrinsic::ID>(Name)
1103 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1104 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1105 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1106 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1107 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1108 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1109 .Case("ftz.nan.xorsign.abs.bf16",
1110 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1111 .Case("ftz.nan.xorsign.abs.bf16x2",
1112 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1113 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1114 .Case("ftz.xorsign.abs.bf16x2",
1115 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1116 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1117 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1118 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1119 .Case("nan.xorsign.abs.bf16x2",
1120 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1121 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1122 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1124
1125 if (Name.consume_front("neg."))
1126 return StringSwitch<Intrinsic::ID>(Name)
1127 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1128 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1130
1132}
1133
1135 return Name.consume_front("local") || Name.consume_front("shared") ||
1136 Name.consume_front("global") || Name.consume_front("constant") ||
1137 Name.consume_front("param");
1138}
1139
1141 bool CanUpgradeDebugIntrinsicsToRecords) {
1142 assert(F && "Illegal to upgrade a non-existent Function.");
1143
1144 StringRef Name = F->getName();
1145
1146 // Quickly eliminate it, if it's not a candidate.
1147 if (!Name.consume_front("llvm.") || Name.empty())
1148 return false;
1149
1150 switch (Name[0]) {
1151 default: break;
1152 case 'a': {
1153 bool IsArm = Name.consume_front("arm.");
1154 if (IsArm || Name.consume_front("aarch64.")) {
1155 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1156 return true;
1157 break;
1158 }
1159
1160 if (Name.consume_front("amdgcn.")) {
1161 if (Name == "alignbit") {
1162 // Target specific intrinsic became redundant
1164 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1165 return true;
1166 }
1167
1168 if (Name.consume_front("atomic.")) {
1169 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1170 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1171 // there's no new declaration.
1172 NewFn = nullptr;
1173 return true;
1174 }
1175 break; // No other 'amdgcn.atomic.*'
1176 }
1177
1178 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1179 Name.consume_front("flat.atomic.")) {
1180 if (Name.starts_with("fadd") ||
1181 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1182 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1183 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1184 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1185 // declaration.
1186 NewFn = nullptr;
1187 return true;
1188 }
1189 }
1190
1191 if (Name.starts_with("ldexp.")) {
1192 // Target specific intrinsic became redundant
1194 F->getParent(), Intrinsic::ldexp,
1195 {F->getReturnType(), F->getArg(1)->getType()});
1196 return true;
1197 }
1198 break; // No other 'amdgcn.*'
1199 }
1200
1201 break;
1202 }
1203 case 'c': {
1204 if (F->arg_size() == 1) {
1206 .StartsWith("ctlz.", Intrinsic::ctlz)
1207 .StartsWith("cttz.", Intrinsic::cttz)
1210 rename(F);
1211 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1212 F->arg_begin()->getType());
1213 return true;
1214 }
1215 }
1216
1217 if (F->arg_size() == 2 && Name == "coro.end") {
1218 rename(F);
1219 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1220 Intrinsic::coro_end);
1221 return true;
1222 }
1223
1224 break;
1225 }
1226 case 'd':
1227 if (Name.consume_front("dbg.")) {
1228 // Mark debug intrinsics for upgrade to new debug format.
1229 if (CanUpgradeDebugIntrinsicsToRecords) {
1230 if (Name == "addr" || Name == "value" || Name == "assign" ||
1231 Name == "declare" || Name == "label") {
1232 // There's no function to replace these with.
1233 NewFn = nullptr;
1234 // But we do want these to get upgraded.
1235 return true;
1236 }
1237 }
1238 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1239 // converted to DbgVariableRecords later.
1240 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1241 rename(F);
1242 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1243 Intrinsic::dbg_value);
1244 return true;
1245 }
1246 break; // No other 'dbg.*'.
1247 }
1248 break;
1249 case 'e':
1250 if (Name.consume_front("experimental.vector.")) {
1253 // Skip over extract.last.active, otherwise it will be 'upgraded'
1254 // to a regular vector extract which is a different operation.
1255 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1256 .StartsWith("extract.", Intrinsic::vector_extract)
1257 .StartsWith("insert.", Intrinsic::vector_insert)
1258 .StartsWith("splice.", Intrinsic::vector_splice)
1259 .StartsWith("reverse.", Intrinsic::vector_reverse)
1260 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1261 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1262 .StartsWith("partial.reduce.add",
1263 Intrinsic::vector_partial_reduce_add)
1266 const auto *FT = F->getFunctionType();
1268 if (ID == Intrinsic::vector_extract ||
1269 ID == Intrinsic::vector_interleave2)
1270 // Extracting overloads the return type.
1271 Tys.push_back(FT->getReturnType());
1272 if (ID != Intrinsic::vector_interleave2)
1273 Tys.push_back(FT->getParamType(0));
1274 if (ID == Intrinsic::vector_insert ||
1275 ID == Intrinsic::vector_partial_reduce_add)
1276 // Inserting overloads the inserted type.
1277 Tys.push_back(FT->getParamType(1));
1278 rename(F);
1279 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1280 return true;
1281 }
1282
1283 if (Name.consume_front("reduce.")) {
1285 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1286 if (R.match(Name, &Groups))
1288 .Case("add", Intrinsic::vector_reduce_add)
1289 .Case("mul", Intrinsic::vector_reduce_mul)
1290 .Case("and", Intrinsic::vector_reduce_and)
1291 .Case("or", Intrinsic::vector_reduce_or)
1292 .Case("xor", Intrinsic::vector_reduce_xor)
1293 .Case("smax", Intrinsic::vector_reduce_smax)
1294 .Case("smin", Intrinsic::vector_reduce_smin)
1295 .Case("umax", Intrinsic::vector_reduce_umax)
1296 .Case("umin", Intrinsic::vector_reduce_umin)
1297 .Case("fmax", Intrinsic::vector_reduce_fmax)
1298 .Case("fmin", Intrinsic::vector_reduce_fmin)
1300
1301 bool V2 = false;
1303 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1304 Groups.clear();
1305 V2 = true;
1306 if (R2.match(Name, &Groups))
1308 .Case("fadd", Intrinsic::vector_reduce_fadd)
1309 .Case("fmul", Intrinsic::vector_reduce_fmul)
1311 }
1313 rename(F);
1314 auto Args = F->getFunctionType()->params();
1315 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1316 {Args[V2 ? 1 : 0]});
1317 return true;
1318 }
1319 break; // No other 'expermental.vector.reduce.*'.
1320 }
1321 break; // No other 'experimental.vector.*'.
1322 }
1323 if (Name.consume_front("experimental.stepvector.")) {
1324 Intrinsic::ID ID = Intrinsic::stepvector;
1325 rename(F);
1327 F->getParent(), ID, F->getFunctionType()->getReturnType());
1328 return true;
1329 }
1330 break; // No other 'e*'.
1331 case 'f':
1332 if (Name.starts_with("flt.rounds")) {
1333 rename(F);
1334 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1335 Intrinsic::get_rounding);
1336 return true;
1337 }
1338 break;
1339 case 'i':
1340 if (Name.starts_with("invariant.group.barrier")) {
1341 // Rename invariant.group.barrier to launder.invariant.group
1342 auto Args = F->getFunctionType()->params();
1343 Type* ObjectPtr[1] = {Args[0]};
1344 rename(F);
1346 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1347 return true;
1348 }
1349 break;
1350 case 'l':
1351 if ((Name.starts_with("lifetime.start") ||
1352 Name.starts_with("lifetime.end")) &&
1353 F->arg_size() == 2) {
1354 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1355 ? Intrinsic::lifetime_start
1356 : Intrinsic::lifetime_end;
1357 rename(F);
1358 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1359 F->getArg(0)->getType());
1360 return true;
1361 }
1362 break;
1363 case 'm': {
1364 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1365 // alignment parameter to embedding the alignment as an attribute of
1366 // the pointer args.
1367 if (unsigned ID = StringSwitch<unsigned>(Name)
1368 .StartsWith("memcpy.", Intrinsic::memcpy)
1369 .StartsWith("memmove.", Intrinsic::memmove)
1370 .Default(0)) {
1371 if (F->arg_size() == 5) {
1372 rename(F);
1373 // Get the types of dest, src, and len
1374 ArrayRef<Type *> ParamTypes =
1375 F->getFunctionType()->params().slice(0, 3);
1376 NewFn =
1377 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1378 return true;
1379 }
1380 }
1381 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1382 rename(F);
1383 // Get the types of dest, and len
1384 const auto *FT = F->getFunctionType();
1385 Type *ParamTypes[2] = {
1386 FT->getParamType(0), // Dest
1387 FT->getParamType(2) // len
1388 };
1389 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1390 Intrinsic::memset, ParamTypes);
1391 return true;
1392 }
1393 break;
1394 }
1395 case 'n': {
1396 if (Name.consume_front("nvvm.")) {
1397 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1398 if (F->arg_size() == 1) {
1399 Intrinsic::ID IID =
1401 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1402 .Case("clz.i", Intrinsic::ctlz)
1403 .Case("popc.i", Intrinsic::ctpop)
1405 if (IID != Intrinsic::not_intrinsic) {
1406 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1407 {F->getReturnType()});
1408 return true;
1409 }
1410 }
1411
1412 // Check for nvvm intrinsics that need a return type adjustment.
1413 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1415 if (IID != Intrinsic::not_intrinsic) {
1416 NewFn = nullptr;
1417 return true;
1418 }
1419 }
1420
1421 // Upgrade Distributed Shared Memory Intrinsics
1423 if (IID != Intrinsic::not_intrinsic) {
1424 rename(F);
1425 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1426 return true;
1427 }
1428
1429 // Upgrade TMA copy G2S Intrinsics
1431 if (IID != Intrinsic::not_intrinsic) {
1432 rename(F);
1433 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1434 return true;
1435 }
1436
1437 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1438 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1439 //
1440 // TODO: We could add lohi.i2d.
1441 bool Expand = false;
1442 if (Name.consume_front("abs."))
1443 // nvvm.abs.{i,ii}
1444 Expand =
1445 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1446 else if (Name.consume_front("fabs."))
1447 // nvvm.fabs.{f,ftz.f,d}
1448 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1449 else if (Name.consume_front("max.") || Name.consume_front("min."))
1450 // nvvm.{min,max}.{i,ii,ui,ull}
1451 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1452 Name == "ui" || Name == "ull";
1453 else if (Name.consume_front("atomic.load."))
1454 // nvvm.atomic.load.add.{f32,f64}.p
1455 // nvvm.atomic.load.{inc,dec}.32.p
1456 Expand = StringSwitch<bool>(Name)
1457 .StartsWith("add.f32.p", true)
1458 .StartsWith("add.f64.p", true)
1459 .StartsWith("inc.32.p", true)
1460 .StartsWith("dec.32.p", true)
1461 .Default(false);
1462 else if (Name.consume_front("bitcast."))
1463 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1464 Expand =
1465 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1466 else if (Name.consume_front("rotate."))
1467 // nvvm.rotate.{b32,b64,right.b64}
1468 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1469 else if (Name.consume_front("ptr.gen.to."))
1470 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1471 Expand = consumeNVVMPtrAddrSpace(Name);
1472 else if (Name.consume_front("ptr."))
1473 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1474 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1475 else if (Name.consume_front("ldg.global."))
1476 // nvvm.ldg.global.{i,p,f}
1477 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1478 Name.starts_with("p."));
1479 else
1480 Expand = StringSwitch<bool>(Name)
1481 .Case("barrier0", true)
1482 .Case("barrier.n", true)
1483 .Case("barrier.sync.cnt", true)
1484 .Case("barrier.sync", true)
1485 .Case("barrier", true)
1486 .Case("bar.sync", true)
1487 .Case("clz.ll", true)
1488 .Case("popc.ll", true)
1489 .Case("h2f", true)
1490 .Case("swap.lo.hi.b64", true)
1491 .Case("tanh.approx.f32", true)
1492 .Default(false);
1493
1494 if (Expand) {
1495 NewFn = nullptr;
1496 return true;
1497 }
1498 break; // No other 'nvvm.*'.
1499 }
1500 break;
1501 }
1502 case 'o':
1503 if (Name.starts_with("objectsize.")) {
1504 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1505 if (F->arg_size() == 2 || F->arg_size() == 3) {
1506 rename(F);
1507 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1508 Intrinsic::objectsize, Tys);
1509 return true;
1510 }
1511 }
1512 break;
1513
1514 case 'p':
1515 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1516 rename(F);
1518 F->getParent(), Intrinsic::ptr_annotation,
1519 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1520 return true;
1521 }
1522 break;
1523
1524 case 'r': {
1525 if (Name.consume_front("riscv.")) {
1528 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1529 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1530 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1531 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1534 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1535 rename(F);
1536 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1537 return true;
1538 }
1539 break; // No other applicable upgrades.
1540 }
1541
1543 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1544 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1547 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1548 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1549 rename(F);
1550 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1551 return true;
1552 }
1553 break; // No other applicable upgrades.
1554 }
1555
1557 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1558 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1559 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1560 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1561 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1562 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1565 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1566 rename(F);
1567 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1568 return true;
1569 }
1570 break; // No other applicable upgrades.
1571 }
1572 break; // No other 'riscv.*' intrinsics
1573 }
1574 } break;
1575
1576 case 's':
1577 if (Name == "stackprotectorcheck") {
1578 NewFn = nullptr;
1579 return true;
1580 }
1581 break;
1582
1583 case 't':
1584 if (Name == "thread.pointer") {
1586 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1587 return true;
1588 }
1589 break;
1590
1591 case 'v': {
1592 if (Name == "var.annotation" && F->arg_size() == 4) {
1593 rename(F);
1595 F->getParent(), Intrinsic::var_annotation,
1596 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1597 return true;
1598 }
1599 break;
1600 }
1601
1602 case 'w':
1603 if (Name.consume_front("wasm.")) {
1606 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1607 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1608 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1611 rename(F);
1612 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1613 F->getReturnType());
1614 return true;
1615 }
1616
1617 if (Name.consume_front("dot.i8x16.i7x16.")) {
1619 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1620 .Case("add.signed",
1621 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1624 rename(F);
1625 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1626 return true;
1627 }
1628 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1629 }
1630 break; // No other 'wasm.*'.
1631 }
1632 break;
1633
1634 case 'x':
1635 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1636 return true;
1637 }
1638
1639 auto *ST = dyn_cast<StructType>(F->getReturnType());
1640 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1641 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1642 // Replace return type with literal non-packed struct. Only do this for
1643 // intrinsics declared to return a struct, not for intrinsics with
1644 // overloaded return type, in which case the exact struct type will be
1645 // mangled into the name.
1648 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1649 auto *FT = F->getFunctionType();
1650 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1651 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1652 std::string Name = F->getName().str();
1653 rename(F);
1654 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1655 Name, F->getParent());
1656
1657 // The new function may also need remangling.
1658 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1659 NewFn = *Result;
1660 return true;
1661 }
1662 }
1663
1664 // Remangle our intrinsic since we upgrade the mangling
1666 if (Result != std::nullopt) {
1667 NewFn = *Result;
1668 return true;
1669 }
1670
1671 // This may not belong here. This function is effectively being overloaded
1672 // to both detect an intrinsic which needs upgrading, and to provide the
1673 // upgraded form of the intrinsic. We should perhaps have two separate
1674 // functions for this.
1675 return false;
1676}
1677
1679 bool CanUpgradeDebugIntrinsicsToRecords) {
1680 NewFn = nullptr;
1681 bool Upgraded =
1682 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1683
1684 // Upgrade intrinsic attributes. This does not change the function.
1685 if (NewFn)
1686 F = NewFn;
1687 if (Intrinsic::ID id = F->getIntrinsicID()) {
1688 // Only do this if the intrinsic signature is valid.
1689 SmallVector<Type *> OverloadTys;
1690 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1691 F->setAttributes(
1692 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1693 }
1694 return Upgraded;
1695}
1696
1698 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1699 GV->getName() == "llvm.global_dtors")) ||
1700 !GV->hasInitializer())
1701 return nullptr;
1703 if (!ATy)
1704 return nullptr;
1706 if (!STy || STy->getNumElements() != 2)
1707 return nullptr;
1708
1709 LLVMContext &C = GV->getContext();
1710 IRBuilder<> IRB(C);
1711 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1712 IRB.getPtrTy());
1713 Constant *Init = GV->getInitializer();
1714 unsigned N = Init->getNumOperands();
1715 std::vector<Constant *> NewCtors(N);
1716 for (unsigned i = 0; i != N; ++i) {
1717 auto Ctor = cast<Constant>(Init->getOperand(i));
1718 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1719 Ctor->getAggregateElement(1),
1721 }
1722 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1723
1724 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1725 NewInit, GV->getName());
1726}
1727
1728// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1729// to byte shuffles.
1731 unsigned Shift) {
1732 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1733 unsigned NumElts = ResultTy->getNumElements() * 8;
1734
1735 // Bitcast from a 64-bit element type to a byte element type.
1736 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1737 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1738
1739 // We'll be shuffling in zeroes.
1740 Value *Res = Constant::getNullValue(VecTy);
1741
1742 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1743 // we'll just return the zero vector.
1744 if (Shift < 16) {
1745 int Idxs[64];
1746 // 256/512-bit version is split into 2/4 16-byte lanes.
1747 for (unsigned l = 0; l != NumElts; l += 16)
1748 for (unsigned i = 0; i != 16; ++i) {
1749 unsigned Idx = NumElts + i - Shift;
1750 if (Idx < NumElts)
1751 Idx -= NumElts - 16; // end of lane, switch operand.
1752 Idxs[l + i] = Idx + l;
1753 }
1754
1755 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1756 }
1757
1758 // Bitcast back to a 64-bit element type.
1759 return Builder.CreateBitCast(Res, ResultTy, "cast");
1760}
1761
1762// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1763// to byte shuffles.
1765 unsigned Shift) {
1766 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1767 unsigned NumElts = ResultTy->getNumElements() * 8;
1768
1769 // Bitcast from a 64-bit element type to a byte element type.
1770 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1771 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1772
1773 // We'll be shuffling in zeroes.
1774 Value *Res = Constant::getNullValue(VecTy);
1775
1776 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1777 // we'll just return the zero vector.
1778 if (Shift < 16) {
1779 int Idxs[64];
1780 // 256/512-bit version is split into 2/4 16-byte lanes.
1781 for (unsigned l = 0; l != NumElts; l += 16)
1782 for (unsigned i = 0; i != 16; ++i) {
1783 unsigned Idx = i + Shift;
1784 if (Idx >= 16)
1785 Idx += NumElts - 16; // end of lane, switch operand.
1786 Idxs[l + i] = Idx + l;
1787 }
1788
1789 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1790 }
1791
1792 // Bitcast back to a 64-bit element type.
1793 return Builder.CreateBitCast(Res, ResultTy, "cast");
1794}
1795
1796static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1797 unsigned NumElts) {
1798 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1800 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1801 Mask = Builder.CreateBitCast(Mask, MaskTy);
1802
1803 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1804 // i8 and we need to extract down to the right number of elements.
1805 if (NumElts <= 4) {
1806 int Indices[4];
1807 for (unsigned i = 0; i != NumElts; ++i)
1808 Indices[i] = i;
1809 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1810 "extract");
1811 }
1812
1813 return Mask;
1814}
1815
1816static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1817 Value *Op1) {
1818 // If the mask is all ones just emit the first operation.
1819 if (const auto *C = dyn_cast<Constant>(Mask))
1820 if (C->isAllOnesValue())
1821 return Op0;
1822
1823 Mask = getX86MaskVec(Builder, Mask,
1824 cast<FixedVectorType>(Op0->getType())->getNumElements());
1825 return Builder.CreateSelect(Mask, Op0, Op1);
1826}
1827
1828static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1829 Value *Op1) {
1830 // If the mask is all ones just emit the first operation.
1831 if (const auto *C = dyn_cast<Constant>(Mask))
1832 if (C->isAllOnesValue())
1833 return Op0;
1834
1835 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1836 Mask->getType()->getIntegerBitWidth());
1837 Mask = Builder.CreateBitCast(Mask, MaskTy);
1838 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1839 return Builder.CreateSelect(Mask, Op0, Op1);
1840}
1841
1842// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1843// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1844// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1846 Value *Op1, Value *Shift,
1847 Value *Passthru, Value *Mask,
1848 bool IsVALIGN) {
1849 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1850
1851 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1852 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1853 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1854 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1855
1856 // Mask the immediate for VALIGN.
1857 if (IsVALIGN)
1858 ShiftVal &= (NumElts - 1);
1859
1860 // If palignr is shifting the pair of vectors more than the size of two
1861 // lanes, emit zero.
1862 if (ShiftVal >= 32)
1864
1865 // If palignr is shifting the pair of input vectors more than one lane,
1866 // but less than two lanes, convert to shifting in zeroes.
1867 if (ShiftVal > 16) {
1868 ShiftVal -= 16;
1869 Op1 = Op0;
1871 }
1872
1873 int Indices[64];
1874 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1875 for (unsigned l = 0; l < NumElts; l += 16) {
1876 for (unsigned i = 0; i != 16; ++i) {
1877 unsigned Idx = ShiftVal + i;
1878 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1879 Idx += NumElts - 16; // End of lane, switch operand.
1880 Indices[l + i] = Idx + l;
1881 }
1882 }
1883
1884 Value *Align = Builder.CreateShuffleVector(
1885 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1886
1887 return emitX86Select(Builder, Mask, Align, Passthru);
1888}
1889
1891 bool ZeroMask, bool IndexForm) {
1892 Type *Ty = CI.getType();
1893 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1894 unsigned EltWidth = Ty->getScalarSizeInBits();
1895 bool IsFloat = Ty->isFPOrFPVectorTy();
1896 Intrinsic::ID IID;
1897 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1898 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1899 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1900 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1901 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1902 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1903 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1904 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1905 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1906 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1907 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1908 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1909 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1910 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1911 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1912 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1913 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1914 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1915 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1916 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1917 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1918 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1919 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1920 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1921 else if (VecWidth == 128 && EltWidth == 16)
1922 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1923 else if (VecWidth == 256 && EltWidth == 16)
1924 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1925 else if (VecWidth == 512 && EltWidth == 16)
1926 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1927 else if (VecWidth == 128 && EltWidth == 8)
1928 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1929 else if (VecWidth == 256 && EltWidth == 8)
1930 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1931 else if (VecWidth == 512 && EltWidth == 8)
1932 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1933 else
1934 llvm_unreachable("Unexpected intrinsic");
1935
1936 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1937 CI.getArgOperand(2) };
1938
1939 // If this isn't index form we need to swap operand 0 and 1.
1940 if (!IndexForm)
1941 std::swap(Args[0], Args[1]);
1942
1943 Value *V = Builder.CreateIntrinsic(IID, Args);
1944 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1945 : Builder.CreateBitCast(CI.getArgOperand(1),
1946 Ty);
1947 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1948}
1949
1951 Intrinsic::ID IID) {
1952 Type *Ty = CI.getType();
1953 Value *Op0 = CI.getOperand(0);
1954 Value *Op1 = CI.getOperand(1);
1955 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
1956
1957 if (CI.arg_size() == 4) { // For masked intrinsics.
1958 Value *VecSrc = CI.getOperand(2);
1959 Value *Mask = CI.getOperand(3);
1960 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1961 }
1962 return Res;
1963}
1964
1966 bool IsRotateRight) {
1967 Type *Ty = CI.getType();
1968 Value *Src = CI.getArgOperand(0);
1969 Value *Amt = CI.getArgOperand(1);
1970
1971 // Amount may be scalar immediate, in which case create a splat vector.
1972 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1973 // we only care about the lowest log2 bits anyway.
1974 if (Amt->getType() != Ty) {
1975 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1976 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1977 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1978 }
1979
1980 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1981 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
1982
1983 if (CI.arg_size() == 4) { // For masked intrinsics.
1984 Value *VecSrc = CI.getOperand(2);
1985 Value *Mask = CI.getOperand(3);
1986 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1987 }
1988 return Res;
1989}
1990
1991static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1992 bool IsSigned) {
1993 Type *Ty = CI.getType();
1994 Value *LHS = CI.getArgOperand(0);
1995 Value *RHS = CI.getArgOperand(1);
1996
1997 CmpInst::Predicate Pred;
1998 switch (Imm) {
1999 case 0x0:
2000 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2001 break;
2002 case 0x1:
2003 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2004 break;
2005 case 0x2:
2006 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2007 break;
2008 case 0x3:
2009 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2010 break;
2011 case 0x4:
2012 Pred = ICmpInst::ICMP_EQ;
2013 break;
2014 case 0x5:
2015 Pred = ICmpInst::ICMP_NE;
2016 break;
2017 case 0x6:
2018 return Constant::getNullValue(Ty); // FALSE
2019 case 0x7:
2020 return Constant::getAllOnesValue(Ty); // TRUE
2021 default:
2022 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2023 }
2024
2025 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2026 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2027 return Ext;
2028}
2029
2031 bool IsShiftRight, bool ZeroMask) {
2032 Type *Ty = CI.getType();
2033 Value *Op0 = CI.getArgOperand(0);
2034 Value *Op1 = CI.getArgOperand(1);
2035 Value *Amt = CI.getArgOperand(2);
2036
2037 if (IsShiftRight)
2038 std::swap(Op0, Op1);
2039
2040 // Amount may be scalar immediate, in which case create a splat vector.
2041 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2042 // we only care about the lowest log2 bits anyway.
2043 if (Amt->getType() != Ty) {
2044 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2045 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2046 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2047 }
2048
2049 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2050 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2051
2052 unsigned NumArgs = CI.arg_size();
2053 if (NumArgs >= 4) { // For masked intrinsics.
2054 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2055 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2056 CI.getArgOperand(0);
2057 Value *Mask = CI.getOperand(NumArgs - 1);
2058 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2059 }
2060 return Res;
2061}
2062
2064 Value *Mask, bool Aligned) {
2065 const Align Alignment =
2066 Aligned
2067 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2068 : Align(1);
2069
2070 // If the mask is all ones just emit a regular store.
2071 if (const auto *C = dyn_cast<Constant>(Mask))
2072 if (C->isAllOnesValue())
2073 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2074
2075 // Convert the mask from an integer type to a vector of i1.
2076 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2077 Mask = getX86MaskVec(Builder, Mask, NumElts);
2078 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2079}
2080
2082 Value *Passthru, Value *Mask, bool Aligned) {
2083 Type *ValTy = Passthru->getType();
2084 const Align Alignment =
2085 Aligned
2086 ? Align(
2088 8)
2089 : Align(1);
2090
2091 // If the mask is all ones just emit a regular store.
2092 if (const auto *C = dyn_cast<Constant>(Mask))
2093 if (C->isAllOnesValue())
2094 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2095
2096 // Convert the mask from an integer type to a vector of i1.
2097 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2098 Mask = getX86MaskVec(Builder, Mask, NumElts);
2099 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2100}
2101
2102static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2103 Type *Ty = CI.getType();
2104 Value *Op0 = CI.getArgOperand(0);
2105 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2106 {Op0, Builder.getInt1(false)});
2107 if (CI.arg_size() == 3)
2108 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2109 return Res;
2110}
2111
2112static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2113 Type *Ty = CI.getType();
2114
2115 // Arguments have a vXi32 type so cast to vXi64.
2116 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2117 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2118
2119 if (IsSigned) {
2120 // Shift left then arithmetic shift right.
2121 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2122 LHS = Builder.CreateShl(LHS, ShiftAmt);
2123 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2124 RHS = Builder.CreateShl(RHS, ShiftAmt);
2125 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2126 } else {
2127 // Clear the upper bits.
2128 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2129 LHS = Builder.CreateAnd(LHS, Mask);
2130 RHS = Builder.CreateAnd(RHS, Mask);
2131 }
2132
2133 Value *Res = Builder.CreateMul(LHS, RHS);
2134
2135 if (CI.arg_size() == 4)
2136 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2137
2138 return Res;
2139}
2140
2141// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2143 Value *Mask) {
2144 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2145 if (Mask) {
2146 const auto *C = dyn_cast<Constant>(Mask);
2147 if (!C || !C->isAllOnesValue())
2148 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2149 }
2150
2151 if (NumElts < 8) {
2152 int Indices[8];
2153 for (unsigned i = 0; i != NumElts; ++i)
2154 Indices[i] = i;
2155 for (unsigned i = NumElts; i != 8; ++i)
2156 Indices[i] = NumElts + i % NumElts;
2157 Vec = Builder.CreateShuffleVector(Vec,
2159 Indices);
2160 }
2161 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2162}
2163
2165 unsigned CC, bool Signed) {
2166 Value *Op0 = CI.getArgOperand(0);
2167 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2168
2169 Value *Cmp;
2170 if (CC == 3) {
2172 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2173 } else if (CC == 7) {
2175 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2176 } else {
2178 switch (CC) {
2179 default: llvm_unreachable("Unknown condition code");
2180 case 0: Pred = ICmpInst::ICMP_EQ; break;
2181 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2182 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2183 case 4: Pred = ICmpInst::ICMP_NE; break;
2184 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2185 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2186 }
2187 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2188 }
2189
2190 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2191
2192 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2193}
2194
2195// Replace a masked intrinsic with an older unmasked intrinsic.
2197 Intrinsic::ID IID) {
2198 Value *Rep =
2199 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2200 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2201}
2202
2204 Value* A = CI.getArgOperand(0);
2205 Value* B = CI.getArgOperand(1);
2206 Value* Src = CI.getArgOperand(2);
2207 Value* Mask = CI.getArgOperand(3);
2208
2209 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2210 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2211 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2212 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2213 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2214 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2215}
2216
2218 Value* Op = CI.getArgOperand(0);
2219 Type* ReturnOp = CI.getType();
2220 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2221 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2222 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2223}
2224
2225// Replace intrinsic with unmasked version and a select.
2227 CallBase &CI, Value *&Rep) {
2228 Name = Name.substr(12); // Remove avx512.mask.
2229
2230 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2231 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2232 Intrinsic::ID IID;
2233 if (Name.starts_with("max.p")) {
2234 if (VecWidth == 128 && EltWidth == 32)
2235 IID = Intrinsic::x86_sse_max_ps;
2236 else if (VecWidth == 128 && EltWidth == 64)
2237 IID = Intrinsic::x86_sse2_max_pd;
2238 else if (VecWidth == 256 && EltWidth == 32)
2239 IID = Intrinsic::x86_avx_max_ps_256;
2240 else if (VecWidth == 256 && EltWidth == 64)
2241 IID = Intrinsic::x86_avx_max_pd_256;
2242 else
2243 llvm_unreachable("Unexpected intrinsic");
2244 } else if (Name.starts_with("min.p")) {
2245 if (VecWidth == 128 && EltWidth == 32)
2246 IID = Intrinsic::x86_sse_min_ps;
2247 else if (VecWidth == 128 && EltWidth == 64)
2248 IID = Intrinsic::x86_sse2_min_pd;
2249 else if (VecWidth == 256 && EltWidth == 32)
2250 IID = Intrinsic::x86_avx_min_ps_256;
2251 else if (VecWidth == 256 && EltWidth == 64)
2252 IID = Intrinsic::x86_avx_min_pd_256;
2253 else
2254 llvm_unreachable("Unexpected intrinsic");
2255 } else if (Name.starts_with("pshuf.b.")) {
2256 if (VecWidth == 128)
2257 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2258 else if (VecWidth == 256)
2259 IID = Intrinsic::x86_avx2_pshuf_b;
2260 else if (VecWidth == 512)
2261 IID = Intrinsic::x86_avx512_pshuf_b_512;
2262 else
2263 llvm_unreachable("Unexpected intrinsic");
2264 } else if (Name.starts_with("pmul.hr.sw.")) {
2265 if (VecWidth == 128)
2266 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2267 else if (VecWidth == 256)
2268 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2269 else if (VecWidth == 512)
2270 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2271 else
2272 llvm_unreachable("Unexpected intrinsic");
2273 } else if (Name.starts_with("pmulh.w.")) {
2274 if (VecWidth == 128)
2275 IID = Intrinsic::x86_sse2_pmulh_w;
2276 else if (VecWidth == 256)
2277 IID = Intrinsic::x86_avx2_pmulh_w;
2278 else if (VecWidth == 512)
2279 IID = Intrinsic::x86_avx512_pmulh_w_512;
2280 else
2281 llvm_unreachable("Unexpected intrinsic");
2282 } else if (Name.starts_with("pmulhu.w.")) {
2283 if (VecWidth == 128)
2284 IID = Intrinsic::x86_sse2_pmulhu_w;
2285 else if (VecWidth == 256)
2286 IID = Intrinsic::x86_avx2_pmulhu_w;
2287 else if (VecWidth == 512)
2288 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2289 else
2290 llvm_unreachable("Unexpected intrinsic");
2291 } else if (Name.starts_with("pmaddw.d.")) {
2292 if (VecWidth == 128)
2293 IID = Intrinsic::x86_sse2_pmadd_wd;
2294 else if (VecWidth == 256)
2295 IID = Intrinsic::x86_avx2_pmadd_wd;
2296 else if (VecWidth == 512)
2297 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2298 else
2299 llvm_unreachable("Unexpected intrinsic");
2300 } else if (Name.starts_with("pmaddubs.w.")) {
2301 if (VecWidth == 128)
2302 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2303 else if (VecWidth == 256)
2304 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2305 else if (VecWidth == 512)
2306 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2307 else
2308 llvm_unreachable("Unexpected intrinsic");
2309 } else if (Name.starts_with("packsswb.")) {
2310 if (VecWidth == 128)
2311 IID = Intrinsic::x86_sse2_packsswb_128;
2312 else if (VecWidth == 256)
2313 IID = Intrinsic::x86_avx2_packsswb;
2314 else if (VecWidth == 512)
2315 IID = Intrinsic::x86_avx512_packsswb_512;
2316 else
2317 llvm_unreachable("Unexpected intrinsic");
2318 } else if (Name.starts_with("packssdw.")) {
2319 if (VecWidth == 128)
2320 IID = Intrinsic::x86_sse2_packssdw_128;
2321 else if (VecWidth == 256)
2322 IID = Intrinsic::x86_avx2_packssdw;
2323 else if (VecWidth == 512)
2324 IID = Intrinsic::x86_avx512_packssdw_512;
2325 else
2326 llvm_unreachable("Unexpected intrinsic");
2327 } else if (Name.starts_with("packuswb.")) {
2328 if (VecWidth == 128)
2329 IID = Intrinsic::x86_sse2_packuswb_128;
2330 else if (VecWidth == 256)
2331 IID = Intrinsic::x86_avx2_packuswb;
2332 else if (VecWidth == 512)
2333 IID = Intrinsic::x86_avx512_packuswb_512;
2334 else
2335 llvm_unreachable("Unexpected intrinsic");
2336 } else if (Name.starts_with("packusdw.")) {
2337 if (VecWidth == 128)
2338 IID = Intrinsic::x86_sse41_packusdw;
2339 else if (VecWidth == 256)
2340 IID = Intrinsic::x86_avx2_packusdw;
2341 else if (VecWidth == 512)
2342 IID = Intrinsic::x86_avx512_packusdw_512;
2343 else
2344 llvm_unreachable("Unexpected intrinsic");
2345 } else if (Name.starts_with("vpermilvar.")) {
2346 if (VecWidth == 128 && EltWidth == 32)
2347 IID = Intrinsic::x86_avx_vpermilvar_ps;
2348 else if (VecWidth == 128 && EltWidth == 64)
2349 IID = Intrinsic::x86_avx_vpermilvar_pd;
2350 else if (VecWidth == 256 && EltWidth == 32)
2351 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2352 else if (VecWidth == 256 && EltWidth == 64)
2353 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2354 else if (VecWidth == 512 && EltWidth == 32)
2355 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2356 else if (VecWidth == 512 && EltWidth == 64)
2357 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2358 else
2359 llvm_unreachable("Unexpected intrinsic");
2360 } else if (Name == "cvtpd2dq.256") {
2361 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2362 } else if (Name == "cvtpd2ps.256") {
2363 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2364 } else if (Name == "cvttpd2dq.256") {
2365 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2366 } else if (Name == "cvttps2dq.128") {
2367 IID = Intrinsic::x86_sse2_cvttps2dq;
2368 } else if (Name == "cvttps2dq.256") {
2369 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2370 } else if (Name.starts_with("permvar.")) {
2371 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2372 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2373 IID = Intrinsic::x86_avx2_permps;
2374 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2375 IID = Intrinsic::x86_avx2_permd;
2376 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2377 IID = Intrinsic::x86_avx512_permvar_df_256;
2378 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2379 IID = Intrinsic::x86_avx512_permvar_di_256;
2380 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2381 IID = Intrinsic::x86_avx512_permvar_sf_512;
2382 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2383 IID = Intrinsic::x86_avx512_permvar_si_512;
2384 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2385 IID = Intrinsic::x86_avx512_permvar_df_512;
2386 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2387 IID = Intrinsic::x86_avx512_permvar_di_512;
2388 else if (VecWidth == 128 && EltWidth == 16)
2389 IID = Intrinsic::x86_avx512_permvar_hi_128;
2390 else if (VecWidth == 256 && EltWidth == 16)
2391 IID = Intrinsic::x86_avx512_permvar_hi_256;
2392 else if (VecWidth == 512 && EltWidth == 16)
2393 IID = Intrinsic::x86_avx512_permvar_hi_512;
2394 else if (VecWidth == 128 && EltWidth == 8)
2395 IID = Intrinsic::x86_avx512_permvar_qi_128;
2396 else if (VecWidth == 256 && EltWidth == 8)
2397 IID = Intrinsic::x86_avx512_permvar_qi_256;
2398 else if (VecWidth == 512 && EltWidth == 8)
2399 IID = Intrinsic::x86_avx512_permvar_qi_512;
2400 else
2401 llvm_unreachable("Unexpected intrinsic");
2402 } else if (Name.starts_with("dbpsadbw.")) {
2403 if (VecWidth == 128)
2404 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2405 else if (VecWidth == 256)
2406 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2407 else if (VecWidth == 512)
2408 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2409 else
2410 llvm_unreachable("Unexpected intrinsic");
2411 } else if (Name.starts_with("pmultishift.qb.")) {
2412 if (VecWidth == 128)
2413 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2414 else if (VecWidth == 256)
2415 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2416 else if (VecWidth == 512)
2417 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2418 else
2419 llvm_unreachable("Unexpected intrinsic");
2420 } else if (Name.starts_with("conflict.")) {
2421 if (Name[9] == 'd' && VecWidth == 128)
2422 IID = Intrinsic::x86_avx512_conflict_d_128;
2423 else if (Name[9] == 'd' && VecWidth == 256)
2424 IID = Intrinsic::x86_avx512_conflict_d_256;
2425 else if (Name[9] == 'd' && VecWidth == 512)
2426 IID = Intrinsic::x86_avx512_conflict_d_512;
2427 else if (Name[9] == 'q' && VecWidth == 128)
2428 IID = Intrinsic::x86_avx512_conflict_q_128;
2429 else if (Name[9] == 'q' && VecWidth == 256)
2430 IID = Intrinsic::x86_avx512_conflict_q_256;
2431 else if (Name[9] == 'q' && VecWidth == 512)
2432 IID = Intrinsic::x86_avx512_conflict_q_512;
2433 else
2434 llvm_unreachable("Unexpected intrinsic");
2435 } else if (Name.starts_with("pavg.")) {
2436 if (Name[5] == 'b' && VecWidth == 128)
2437 IID = Intrinsic::x86_sse2_pavg_b;
2438 else if (Name[5] == 'b' && VecWidth == 256)
2439 IID = Intrinsic::x86_avx2_pavg_b;
2440 else if (Name[5] == 'b' && VecWidth == 512)
2441 IID = Intrinsic::x86_avx512_pavg_b_512;
2442 else if (Name[5] == 'w' && VecWidth == 128)
2443 IID = Intrinsic::x86_sse2_pavg_w;
2444 else if (Name[5] == 'w' && VecWidth == 256)
2445 IID = Intrinsic::x86_avx2_pavg_w;
2446 else if (Name[5] == 'w' && VecWidth == 512)
2447 IID = Intrinsic::x86_avx512_pavg_w_512;
2448 else
2449 llvm_unreachable("Unexpected intrinsic");
2450 } else
2451 return false;
2452
2453 SmallVector<Value *, 4> Args(CI.args());
2454 Args.pop_back();
2455 Args.pop_back();
2456 Rep = Builder.CreateIntrinsic(IID, Args);
2457 unsigned NumArgs = CI.arg_size();
2458 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2459 CI.getArgOperand(NumArgs - 2));
2460 return true;
2461}
2462
2463/// Upgrade comment in call to inline asm that represents an objc retain release
2464/// marker.
2465void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2466 size_t Pos;
2467 if (AsmStr->find("mov\tfp") == 0 &&
2468 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2469 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2470 AsmStr->replace(Pos, 1, ";");
2471 }
2472}
2473
2475 Function *F, IRBuilder<> &Builder) {
2476 Value *Rep = nullptr;
2477
2478 if (Name == "abs.i" || Name == "abs.ll") {
2479 Value *Arg = CI->getArgOperand(0);
2480 Value *Neg = Builder.CreateNeg(Arg, "neg");
2481 Value *Cmp = Builder.CreateICmpSGE(
2482 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2483 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2484 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2485 Type *Ty = (Name == "abs.bf16")
2486 ? Builder.getBFloatTy()
2487 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2488 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2489 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2490 Rep = Builder.CreateBitCast(Abs, CI->getType());
2491 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2492 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2493 : Intrinsic::nvvm_fabs;
2494 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2495 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2496 Name.starts_with("atomic.load.add.f64.p")) {
2497 Value *Ptr = CI->getArgOperand(0);
2498 Value *Val = CI->getArgOperand(1);
2499 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2501 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2502 Name.starts_with("atomic.load.dec.32.p")) {
2503 Value *Ptr = CI->getArgOperand(0);
2504 Value *Val = CI->getArgOperand(1);
2505 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2507 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2509 } else if (Name.consume_front("max.") &&
2510 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2511 Name == "ui" || Name == "ull")) {
2512 Value *Arg0 = CI->getArgOperand(0);
2513 Value *Arg1 = CI->getArgOperand(1);
2514 Value *Cmp = Name.starts_with("u")
2515 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2516 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2517 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2518 } else if (Name.consume_front("min.") &&
2519 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2520 Name == "ui" || Name == "ull")) {
2521 Value *Arg0 = CI->getArgOperand(0);
2522 Value *Arg1 = CI->getArgOperand(1);
2523 Value *Cmp = Name.starts_with("u")
2524 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2525 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2526 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2527 } else if (Name == "clz.ll") {
2528 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2529 Value *Arg = CI->getArgOperand(0);
2530 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2531 {Arg, Builder.getFalse()},
2532 /*FMFSource=*/nullptr, "ctlz");
2533 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2534 } else if (Name == "popc.ll") {
2535 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2536 // i64.
2537 Value *Arg = CI->getArgOperand(0);
2538 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2539 Arg, /*FMFSource=*/nullptr, "ctpop");
2540 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2541 } else if (Name == "h2f") {
2542 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2543 {Builder.getFloatTy()}, CI->getArgOperand(0),
2544 /*FMFSource=*/nullptr, "h2f");
2545 } else if (Name.consume_front("bitcast.") &&
2546 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2547 Name == "d2ll")) {
2548 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2549 } else if (Name == "rotate.b32") {
2550 Value *Arg = CI->getOperand(0);
2551 Value *ShiftAmt = CI->getOperand(1);
2552 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2553 {Arg, Arg, ShiftAmt});
2554 } else if (Name == "rotate.b64") {
2555 Type *Int64Ty = Builder.getInt64Ty();
2556 Value *Arg = CI->getOperand(0);
2557 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2558 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2559 {Arg, Arg, ZExtShiftAmt});
2560 } else if (Name == "rotate.right.b64") {
2561 Type *Int64Ty = Builder.getInt64Ty();
2562 Value *Arg = CI->getOperand(0);
2563 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2564 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2565 {Arg, Arg, ZExtShiftAmt});
2566 } else if (Name == "swap.lo.hi.b64") {
2567 Type *Int64Ty = Builder.getInt64Ty();
2568 Value *Arg = CI->getOperand(0);
2569 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2570 {Arg, Arg, Builder.getInt64(32)});
2571 } else if ((Name.consume_front("ptr.gen.to.") &&
2572 consumeNVVMPtrAddrSpace(Name)) ||
2573 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2574 Name.starts_with(".to.gen"))) {
2575 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2576 } else if (Name.consume_front("ldg.global")) {
2577 Value *Ptr = CI->getArgOperand(0);
2578 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2579 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2580 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2581 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2582 MDNode *MD = MDNode::get(Builder.getContext(), {});
2583 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2584 return LD;
2585 } else if (Name == "tanh.approx.f32") {
2586 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2587 FastMathFlags FMF;
2588 FMF.setApproxFunc();
2589 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2590 FMF);
2591 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2592 Value *Arg =
2593 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2594 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2595 {}, {Arg});
2596 } else if (Name == "barrier") {
2597 Rep = Builder.CreateIntrinsic(
2598 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2599 {CI->getArgOperand(0), CI->getArgOperand(1)});
2600 } else if (Name == "barrier.sync") {
2601 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2602 {CI->getArgOperand(0)});
2603 } else if (Name == "barrier.sync.cnt") {
2604 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2605 {CI->getArgOperand(0), CI->getArgOperand(1)});
2606 } else {
2608 if (IID != Intrinsic::not_intrinsic &&
2609 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2610 rename(F);
2611 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2613 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2614 Value *Arg = CI->getArgOperand(I);
2615 Type *OldType = Arg->getType();
2616 Type *NewType = NewFn->getArg(I)->getType();
2617 Args.push_back(
2618 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2619 ? Builder.CreateBitCast(Arg, NewType)
2620 : Arg);
2621 }
2622 Rep = Builder.CreateCall(NewFn, Args);
2623 if (F->getReturnType()->isIntegerTy())
2624 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2625 }
2626 }
2627
2628 return Rep;
2629}
2630
2632 IRBuilder<> &Builder) {
2633 LLVMContext &C = F->getContext();
2634 Value *Rep = nullptr;
2635
2636 if (Name.starts_with("sse4a.movnt.")) {
2638 Elts.push_back(
2639 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2640 MDNode *Node = MDNode::get(C, Elts);
2641
2642 Value *Arg0 = CI->getArgOperand(0);
2643 Value *Arg1 = CI->getArgOperand(1);
2644
2645 // Nontemporal (unaligned) store of the 0'th element of the float/double
2646 // vector.
2647 Value *Extract =
2648 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2649
2650 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2651 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2652 } else if (Name.starts_with("avx.movnt.") ||
2653 Name.starts_with("avx512.storent.")) {
2655 Elts.push_back(
2656 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2657 MDNode *Node = MDNode::get(C, Elts);
2658
2659 Value *Arg0 = CI->getArgOperand(0);
2660 Value *Arg1 = CI->getArgOperand(1);
2661
2662 StoreInst *SI = Builder.CreateAlignedStore(
2663 Arg1, Arg0,
2665 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2666 } else if (Name == "sse2.storel.dq") {
2667 Value *Arg0 = CI->getArgOperand(0);
2668 Value *Arg1 = CI->getArgOperand(1);
2669
2670 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2671 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2672 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2673 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2674 } else if (Name.starts_with("sse.storeu.") ||
2675 Name.starts_with("sse2.storeu.") ||
2676 Name.starts_with("avx.storeu.")) {
2677 Value *Arg0 = CI->getArgOperand(0);
2678 Value *Arg1 = CI->getArgOperand(1);
2679 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2680 } else if (Name == "avx512.mask.store.ss") {
2681 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2682 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2683 Mask, false);
2684 } else if (Name.starts_with("avx512.mask.store")) {
2685 // "avx512.mask.storeu." or "avx512.mask.store."
2686 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2687 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2688 CI->getArgOperand(2), Aligned);
2689 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2690 // Upgrade packed integer vector compare intrinsics to compare instructions.
2691 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2692 bool CmpEq = Name[9] == 'e';
2693 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2694 CI->getArgOperand(0), CI->getArgOperand(1));
2695 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2696 } else if (Name.starts_with("avx512.broadcastm")) {
2697 Type *ExtTy = Type::getInt32Ty(C);
2698 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2699 ExtTy = Type::getInt64Ty(C);
2700 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2701 ExtTy->getPrimitiveSizeInBits();
2702 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2703 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2704 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2705 Value *Vec = CI->getArgOperand(0);
2706 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2707 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2708 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2709 } else if (Name.starts_with("avx.sqrt.p") ||
2710 Name.starts_with("sse2.sqrt.p") ||
2711 Name.starts_with("sse.sqrt.p")) {
2712 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2713 {CI->getArgOperand(0)});
2714 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2715 if (CI->arg_size() == 4 &&
2716 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2717 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2718 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2719 : Intrinsic::x86_avx512_sqrt_pd_512;
2720
2721 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2722 Rep = Builder.CreateIntrinsic(IID, Args);
2723 } else {
2724 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2725 {CI->getArgOperand(0)});
2726 }
2727 Rep =
2728 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2729 } else if (Name.starts_with("avx512.ptestm") ||
2730 Name.starts_with("avx512.ptestnm")) {
2731 Value *Op0 = CI->getArgOperand(0);
2732 Value *Op1 = CI->getArgOperand(1);
2733 Value *Mask = CI->getArgOperand(2);
2734 Rep = Builder.CreateAnd(Op0, Op1);
2735 llvm::Type *Ty = Op0->getType();
2737 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2740 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2741 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2742 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2743 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2744 ->getNumElements();
2745 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2746 Rep =
2747 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2748 } else if (Name.starts_with("avx512.kunpck")) {
2749 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2750 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2751 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2752 int Indices[64];
2753 for (unsigned i = 0; i != NumElts; ++i)
2754 Indices[i] = i;
2755
2756 // First extract half of each vector. This gives better codegen than
2757 // doing it in a single shuffle.
2758 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2759 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2760 // Concat the vectors.
2761 // NOTE: Operands have to be swapped to match intrinsic definition.
2762 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2763 Rep = Builder.CreateBitCast(Rep, CI->getType());
2764 } else if (Name == "avx512.kand.w") {
2765 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2766 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2767 Rep = Builder.CreateAnd(LHS, RHS);
2768 Rep = Builder.CreateBitCast(Rep, CI->getType());
2769 } else if (Name == "avx512.kandn.w") {
2770 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2771 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2772 LHS = Builder.CreateNot(LHS);
2773 Rep = Builder.CreateAnd(LHS, RHS);
2774 Rep = Builder.CreateBitCast(Rep, CI->getType());
2775 } else if (Name == "avx512.kor.w") {
2776 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2777 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2778 Rep = Builder.CreateOr(LHS, RHS);
2779 Rep = Builder.CreateBitCast(Rep, CI->getType());
2780 } else if (Name == "avx512.kxor.w") {
2781 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2782 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2783 Rep = Builder.CreateXor(LHS, RHS);
2784 Rep = Builder.CreateBitCast(Rep, CI->getType());
2785 } else if (Name == "avx512.kxnor.w") {
2786 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2787 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2788 LHS = Builder.CreateNot(LHS);
2789 Rep = Builder.CreateXor(LHS, RHS);
2790 Rep = Builder.CreateBitCast(Rep, CI->getType());
2791 } else if (Name == "avx512.knot.w") {
2792 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2793 Rep = Builder.CreateNot(Rep);
2794 Rep = Builder.CreateBitCast(Rep, CI->getType());
2795 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2796 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2797 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2798 Rep = Builder.CreateOr(LHS, RHS);
2799 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2800 Value *C;
2801 if (Name[14] == 'c')
2802 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2803 else
2804 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2805 Rep = Builder.CreateICmpEQ(Rep, C);
2806 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2807 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2808 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2809 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2810 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2811 Type *I32Ty = Type::getInt32Ty(C);
2812 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2813 ConstantInt::get(I32Ty, 0));
2814 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2815 ConstantInt::get(I32Ty, 0));
2816 Value *EltOp;
2817 if (Name.contains(".add."))
2818 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2819 else if (Name.contains(".sub."))
2820 EltOp = Builder.CreateFSub(Elt0, Elt1);
2821 else if (Name.contains(".mul."))
2822 EltOp = Builder.CreateFMul(Elt0, Elt1);
2823 else
2824 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2825 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2826 ConstantInt::get(I32Ty, 0));
2827 } else if (Name.starts_with("avx512.mask.pcmp")) {
2828 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2829 bool CmpEq = Name[16] == 'e';
2830 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2831 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2832 Type *OpTy = CI->getArgOperand(0)->getType();
2833 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2834 Intrinsic::ID IID;
2835 switch (VecWidth) {
2836 default:
2837 llvm_unreachable("Unexpected intrinsic");
2838 case 128:
2839 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2840 break;
2841 case 256:
2842 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2843 break;
2844 case 512:
2845 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2846 break;
2847 }
2848
2849 Rep =
2850 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2851 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2852 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2853 Type *OpTy = CI->getArgOperand(0)->getType();
2854 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2855 unsigned EltWidth = OpTy->getScalarSizeInBits();
2856 Intrinsic::ID IID;
2857 if (VecWidth == 128 && EltWidth == 32)
2858 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2859 else if (VecWidth == 256 && EltWidth == 32)
2860 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2861 else if (VecWidth == 512 && EltWidth == 32)
2862 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2863 else if (VecWidth == 128 && EltWidth == 64)
2864 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2865 else if (VecWidth == 256 && EltWidth == 64)
2866 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2867 else if (VecWidth == 512 && EltWidth == 64)
2868 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2869 else
2870 llvm_unreachable("Unexpected intrinsic");
2871
2872 Rep =
2873 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2874 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2875 } else if (Name.starts_with("avx512.cmp.p")) {
2876 SmallVector<Value *, 4> Args(CI->args());
2877 Type *OpTy = Args[0]->getType();
2878 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2879 unsigned EltWidth = OpTy->getScalarSizeInBits();
2880 Intrinsic::ID IID;
2881 if (VecWidth == 128 && EltWidth == 32)
2882 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2883 else if (VecWidth == 256 && EltWidth == 32)
2884 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2885 else if (VecWidth == 512 && EltWidth == 32)
2886 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2887 else if (VecWidth == 128 && EltWidth == 64)
2888 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2889 else if (VecWidth == 256 && EltWidth == 64)
2890 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2891 else if (VecWidth == 512 && EltWidth == 64)
2892 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2893 else
2894 llvm_unreachable("Unexpected intrinsic");
2895
2897 if (VecWidth == 512)
2898 std::swap(Mask, Args.back());
2899 Args.push_back(Mask);
2900
2901 Rep = Builder.CreateIntrinsic(IID, Args);
2902 } else if (Name.starts_with("avx512.mask.cmp.")) {
2903 // Integer compare intrinsics.
2904 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2905 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2906 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2907 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2908 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2909 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2910 Name.starts_with("avx512.cvtw2mask.") ||
2911 Name.starts_with("avx512.cvtd2mask.") ||
2912 Name.starts_with("avx512.cvtq2mask.")) {
2913 Value *Op = CI->getArgOperand(0);
2914 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2915 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2916 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2917 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2918 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2919 Name.starts_with("avx512.mask.pabs")) {
2920 Rep = upgradeAbs(Builder, *CI);
2921 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2922 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2923 Name.starts_with("avx512.mask.pmaxs")) {
2924 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2925 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2926 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2927 Name.starts_with("avx512.mask.pmaxu")) {
2928 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2929 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2930 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2931 Name.starts_with("avx512.mask.pmins")) {
2932 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2933 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2934 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2935 Name.starts_with("avx512.mask.pminu")) {
2936 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2937 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2938 Name == "avx512.pmulu.dq.512" ||
2939 Name.starts_with("avx512.mask.pmulu.dq.")) {
2940 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2941 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2942 Name == "avx512.pmul.dq.512" ||
2943 Name.starts_with("avx512.mask.pmul.dq.")) {
2944 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2945 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2946 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2947 Rep =
2948 Builder.CreateSIToFP(CI->getArgOperand(1),
2949 cast<VectorType>(CI->getType())->getElementType());
2950 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2951 } else if (Name == "avx512.cvtusi2sd") {
2952 Rep =
2953 Builder.CreateUIToFP(CI->getArgOperand(1),
2954 cast<VectorType>(CI->getType())->getElementType());
2955 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2956 } else if (Name == "sse2.cvtss2sd") {
2957 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2958 Rep = Builder.CreateFPExt(
2959 Rep, cast<VectorType>(CI->getType())->getElementType());
2960 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2961 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2962 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2963 Name.starts_with("avx512.mask.cvtdq2pd.") ||
2964 Name.starts_with("avx512.mask.cvtudq2pd.") ||
2965 Name.starts_with("avx512.mask.cvtdq2ps.") ||
2966 Name.starts_with("avx512.mask.cvtudq2ps.") ||
2967 Name.starts_with("avx512.mask.cvtqq2pd.") ||
2968 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2969 Name == "avx512.mask.cvtqq2ps.256" ||
2970 Name == "avx512.mask.cvtqq2ps.512" ||
2971 Name == "avx512.mask.cvtuqq2ps.256" ||
2972 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2973 Name == "avx.cvt.ps2.pd.256" ||
2974 Name == "avx512.mask.cvtps2pd.128" ||
2975 Name == "avx512.mask.cvtps2pd.256") {
2976 auto *DstTy = cast<FixedVectorType>(CI->getType());
2977 Rep = CI->getArgOperand(0);
2978 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2979
2980 unsigned NumDstElts = DstTy->getNumElements();
2981 if (NumDstElts < SrcTy->getNumElements()) {
2982 assert(NumDstElts == 2 && "Unexpected vector size");
2983 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2984 }
2985
2986 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2987 bool IsUnsigned = Name.contains("cvtu");
2988 if (IsPS2PD)
2989 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2990 else if (CI->arg_size() == 4 &&
2991 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2992 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2993 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2994 : Intrinsic::x86_avx512_sitofp_round;
2995 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
2996 {Rep, CI->getArgOperand(3)});
2997 } else {
2998 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2999 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3000 }
3001
3002 if (CI->arg_size() >= 3)
3003 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3004 CI->getArgOperand(1));
3005 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3006 Name.starts_with("vcvtph2ps.")) {
3007 auto *DstTy = cast<FixedVectorType>(CI->getType());
3008 Rep = CI->getArgOperand(0);
3009 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3010 unsigned NumDstElts = DstTy->getNumElements();
3011 if (NumDstElts != SrcTy->getNumElements()) {
3012 assert(NumDstElts == 4 && "Unexpected vector size");
3013 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3014 }
3015 Rep = Builder.CreateBitCast(
3016 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3017 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3018 if (CI->arg_size() >= 3)
3019 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3020 CI->getArgOperand(1));
3021 } else if (Name.starts_with("avx512.mask.load")) {
3022 // "avx512.mask.loadu." or "avx512.mask.load."
3023 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3024 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3025 CI->getArgOperand(2), Aligned);
3026 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3027 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3028 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3029 ResultTy->getNumElements());
3030
3031 Rep = Builder.CreateIntrinsic(
3032 Intrinsic::masked_expandload, ResultTy,
3033 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3034 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3035 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3036 Value *MaskVec =
3037 getX86MaskVec(Builder, CI->getArgOperand(2),
3038 cast<FixedVectorType>(ResultTy)->getNumElements());
3039
3040 Rep = Builder.CreateIntrinsic(
3041 Intrinsic::masked_compressstore, ResultTy,
3042 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3043 } else if (Name.starts_with("avx512.mask.compress.") ||
3044 Name.starts_with("avx512.mask.expand.")) {
3045 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3046
3047 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3048 ResultTy->getNumElements());
3049
3050 bool IsCompress = Name[12] == 'c';
3051 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3052 : Intrinsic::x86_avx512_mask_expand;
3053 Rep = Builder.CreateIntrinsic(
3054 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3055 } else if (Name.starts_with("xop.vpcom")) {
3056 bool IsSigned;
3057 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3058 Name.ends_with("uq"))
3059 IsSigned = false;
3060 else if (Name.ends_with("b") || Name.ends_with("w") ||
3061 Name.ends_with("d") || Name.ends_with("q"))
3062 IsSigned = true;
3063 else
3064 llvm_unreachable("Unknown suffix");
3065
3066 unsigned Imm;
3067 if (CI->arg_size() == 3) {
3068 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3069 } else {
3070 Name = Name.substr(9); // strip off "xop.vpcom"
3071 if (Name.starts_with("lt"))
3072 Imm = 0;
3073 else if (Name.starts_with("le"))
3074 Imm = 1;
3075 else if (Name.starts_with("gt"))
3076 Imm = 2;
3077 else if (Name.starts_with("ge"))
3078 Imm = 3;
3079 else if (Name.starts_with("eq"))
3080 Imm = 4;
3081 else if (Name.starts_with("ne"))
3082 Imm = 5;
3083 else if (Name.starts_with("false"))
3084 Imm = 6;
3085 else if (Name.starts_with("true"))
3086 Imm = 7;
3087 else
3088 llvm_unreachable("Unknown condition");
3089 }
3090
3091 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3092 } else if (Name.starts_with("xop.vpcmov")) {
3093 Value *Sel = CI->getArgOperand(2);
3094 Value *NotSel = Builder.CreateNot(Sel);
3095 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3096 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3097 Rep = Builder.CreateOr(Sel0, Sel1);
3098 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3099 Name.starts_with("avx512.mask.prol")) {
3100 Rep = upgradeX86Rotate(Builder, *CI, false);
3101 } else if (Name.starts_with("avx512.pror") ||
3102 Name.starts_with("avx512.mask.pror")) {
3103 Rep = upgradeX86Rotate(Builder, *CI, true);
3104 } else if (Name.starts_with("avx512.vpshld.") ||
3105 Name.starts_with("avx512.mask.vpshld") ||
3106 Name.starts_with("avx512.maskz.vpshld")) {
3107 bool ZeroMask = Name[11] == 'z';
3108 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3109 } else if (Name.starts_with("avx512.vpshrd.") ||
3110 Name.starts_with("avx512.mask.vpshrd") ||
3111 Name.starts_with("avx512.maskz.vpshrd")) {
3112 bool ZeroMask = Name[11] == 'z';
3113 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3114 } else if (Name == "sse42.crc32.64.8") {
3115 Value *Trunc0 =
3116 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3117 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3118 {Trunc0, CI->getArgOperand(1)});
3119 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3120 } else if (Name.starts_with("avx.vbroadcast.s") ||
3121 Name.starts_with("avx512.vbroadcast.s")) {
3122 // Replace broadcasts with a series of insertelements.
3123 auto *VecTy = cast<FixedVectorType>(CI->getType());
3124 Type *EltTy = VecTy->getElementType();
3125 unsigned EltNum = VecTy->getNumElements();
3126 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3127 Type *I32Ty = Type::getInt32Ty(C);
3128 Rep = PoisonValue::get(VecTy);
3129 for (unsigned I = 0; I < EltNum; ++I)
3130 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3131 } else if (Name.starts_with("sse41.pmovsx") ||
3132 Name.starts_with("sse41.pmovzx") ||
3133 Name.starts_with("avx2.pmovsx") ||
3134 Name.starts_with("avx2.pmovzx") ||
3135 Name.starts_with("avx512.mask.pmovsx") ||
3136 Name.starts_with("avx512.mask.pmovzx")) {
3137 auto *DstTy = cast<FixedVectorType>(CI->getType());
3138 unsigned NumDstElts = DstTy->getNumElements();
3139
3140 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3141 SmallVector<int, 8> ShuffleMask(NumDstElts);
3142 for (unsigned i = 0; i != NumDstElts; ++i)
3143 ShuffleMask[i] = i;
3144
3145 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3146
3147 bool DoSext = Name.contains("pmovsx");
3148 Rep =
3149 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3150 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3151 if (CI->arg_size() == 3)
3152 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3153 CI->getArgOperand(1));
3154 } else if (Name == "avx512.mask.pmov.qd.256" ||
3155 Name == "avx512.mask.pmov.qd.512" ||
3156 Name == "avx512.mask.pmov.wb.256" ||
3157 Name == "avx512.mask.pmov.wb.512") {
3158 Type *Ty = CI->getArgOperand(1)->getType();
3159 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3160 Rep =
3161 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3162 } else if (Name.starts_with("avx.vbroadcastf128") ||
3163 Name == "avx2.vbroadcasti128") {
3164 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3165 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3166 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3167 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3168 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3169 if (NumSrcElts == 2)
3170 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3171 else
3172 Rep = Builder.CreateShuffleVector(Load,
3173 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3174 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3175 Name.starts_with("avx512.mask.shuf.f")) {
3176 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3177 Type *VT = CI->getType();
3178 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3179 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3180 unsigned ControlBitsMask = NumLanes - 1;
3181 unsigned NumControlBits = NumLanes / 2;
3182 SmallVector<int, 8> ShuffleMask(0);
3183
3184 for (unsigned l = 0; l != NumLanes; ++l) {
3185 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3186 // We actually need the other source.
3187 if (l >= NumLanes / 2)
3188 LaneMask += NumLanes;
3189 for (unsigned i = 0; i != NumElementsInLane; ++i)
3190 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3191 }
3192 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3193 CI->getArgOperand(1), ShuffleMask);
3194 Rep =
3195 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3196 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3197 Name.starts_with("avx512.mask.broadcasti")) {
3198 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3199 ->getNumElements();
3200 unsigned NumDstElts =
3201 cast<FixedVectorType>(CI->getType())->getNumElements();
3202
3203 SmallVector<int, 8> ShuffleMask(NumDstElts);
3204 for (unsigned i = 0; i != NumDstElts; ++i)
3205 ShuffleMask[i] = i % NumSrcElts;
3206
3207 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3208 CI->getArgOperand(0), ShuffleMask);
3209 Rep =
3210 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3211 } else if (Name.starts_with("avx2.pbroadcast") ||
3212 Name.starts_with("avx2.vbroadcast") ||
3213 Name.starts_with("avx512.pbroadcast") ||
3214 Name.starts_with("avx512.mask.broadcast.s")) {
3215 // Replace vp?broadcasts with a vector shuffle.
3216 Value *Op = CI->getArgOperand(0);
3217 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3218 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3221 Rep = Builder.CreateShuffleVector(Op, M);
3222
3223 if (CI->arg_size() == 3)
3224 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3225 CI->getArgOperand(1));
3226 } else if (Name.starts_with("sse2.padds.") ||
3227 Name.starts_with("avx2.padds.") ||
3228 Name.starts_with("avx512.padds.") ||
3229 Name.starts_with("avx512.mask.padds.")) {
3230 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3231 } else if (Name.starts_with("sse2.psubs.") ||
3232 Name.starts_with("avx2.psubs.") ||
3233 Name.starts_with("avx512.psubs.") ||
3234 Name.starts_with("avx512.mask.psubs.")) {
3235 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3236 } else if (Name.starts_with("sse2.paddus.") ||
3237 Name.starts_with("avx2.paddus.") ||
3238 Name.starts_with("avx512.mask.paddus.")) {
3239 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3240 } else if (Name.starts_with("sse2.psubus.") ||
3241 Name.starts_with("avx2.psubus.") ||
3242 Name.starts_with("avx512.mask.psubus.")) {
3243 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3244 } else if (Name.starts_with("avx512.mask.palignr.")) {
3245 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3246 CI->getArgOperand(1), CI->getArgOperand(2),
3247 CI->getArgOperand(3), CI->getArgOperand(4),
3248 false);
3249 } else if (Name.starts_with("avx512.mask.valign.")) {
3251 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3252 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3253 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3254 // 128/256-bit shift left specified in bits.
3255 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3256 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3257 Shift / 8); // Shift is in bits.
3258 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3259 // 128/256-bit shift right specified in bits.
3260 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3261 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3262 Shift / 8); // Shift is in bits.
3263 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3264 Name == "avx512.psll.dq.512") {
3265 // 128/256/512-bit shift left specified in bytes.
3266 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3267 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3268 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3269 Name == "avx512.psrl.dq.512") {
3270 // 128/256/512-bit shift right specified in bytes.
3271 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3272 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3273 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3274 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3275 Name.starts_with("avx2.pblendd.")) {
3276 Value *Op0 = CI->getArgOperand(0);
3277 Value *Op1 = CI->getArgOperand(1);
3278 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3279 auto *VecTy = cast<FixedVectorType>(CI->getType());
3280 unsigned NumElts = VecTy->getNumElements();
3281
3282 SmallVector<int, 16> Idxs(NumElts);
3283 for (unsigned i = 0; i != NumElts; ++i)
3284 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3285
3286 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3287 } else if (Name.starts_with("avx.vinsertf128.") ||
3288 Name == "avx2.vinserti128" ||
3289 Name.starts_with("avx512.mask.insert")) {
3290 Value *Op0 = CI->getArgOperand(0);
3291 Value *Op1 = CI->getArgOperand(1);
3292 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3293 unsigned DstNumElts =
3294 cast<FixedVectorType>(CI->getType())->getNumElements();
3295 unsigned SrcNumElts =
3296 cast<FixedVectorType>(Op1->getType())->getNumElements();
3297 unsigned Scale = DstNumElts / SrcNumElts;
3298
3299 // Mask off the high bits of the immediate value; hardware ignores those.
3300 Imm = Imm % Scale;
3301
3302 // Extend the second operand into a vector the size of the destination.
3303 SmallVector<int, 8> Idxs(DstNumElts);
3304 for (unsigned i = 0; i != SrcNumElts; ++i)
3305 Idxs[i] = i;
3306 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3307 Idxs[i] = SrcNumElts;
3308 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3309
3310 // Insert the second operand into the first operand.
3311
3312 // Note that there is no guarantee that instruction lowering will actually
3313 // produce a vinsertf128 instruction for the created shuffles. In
3314 // particular, the 0 immediate case involves no lane changes, so it can
3315 // be handled as a blend.
3316
3317 // Example of shuffle mask for 32-bit elements:
3318 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3319 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3320
3321 // First fill with identify mask.
3322 for (unsigned i = 0; i != DstNumElts; ++i)
3323 Idxs[i] = i;
3324 // Then replace the elements where we need to insert.
3325 for (unsigned i = 0; i != SrcNumElts; ++i)
3326 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3327 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3328
3329 // If the intrinsic has a mask operand, handle that.
3330 if (CI->arg_size() == 5)
3331 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3332 CI->getArgOperand(3));
3333 } else if (Name.starts_with("avx.vextractf128.") ||
3334 Name == "avx2.vextracti128" ||
3335 Name.starts_with("avx512.mask.vextract")) {
3336 Value *Op0 = CI->getArgOperand(0);
3337 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3338 unsigned DstNumElts =
3339 cast<FixedVectorType>(CI->getType())->getNumElements();
3340 unsigned SrcNumElts =
3341 cast<FixedVectorType>(Op0->getType())->getNumElements();
3342 unsigned Scale = SrcNumElts / DstNumElts;
3343
3344 // Mask off the high bits of the immediate value; hardware ignores those.
3345 Imm = Imm % Scale;
3346
3347 // Get indexes for the subvector of the input vector.
3348 SmallVector<int, 8> Idxs(DstNumElts);
3349 for (unsigned i = 0; i != DstNumElts; ++i) {
3350 Idxs[i] = i + (Imm * DstNumElts);
3351 }
3352 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3353
3354 // If the intrinsic has a mask operand, handle that.
3355 if (CI->arg_size() == 4)
3356 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3357 CI->getArgOperand(2));
3358 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3359 Name.starts_with("avx512.mask.perm.di.")) {
3360 Value *Op0 = CI->getArgOperand(0);
3361 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3362 auto *VecTy = cast<FixedVectorType>(CI->getType());
3363 unsigned NumElts = VecTy->getNumElements();
3364
3365 SmallVector<int, 8> Idxs(NumElts);
3366 for (unsigned i = 0; i != NumElts; ++i)
3367 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3368
3369 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3370
3371 if (CI->arg_size() == 4)
3372 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3373 CI->getArgOperand(2));
3374 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3375 // The immediate permute control byte looks like this:
3376 // [1:0] - select 128 bits from sources for low half of destination
3377 // [2] - ignore
3378 // [3] - zero low half of destination
3379 // [5:4] - select 128 bits from sources for high half of destination
3380 // [6] - ignore
3381 // [7] - zero high half of destination
3382
3383 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3384
3385 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3386 unsigned HalfSize = NumElts / 2;
3387 SmallVector<int, 8> ShuffleMask(NumElts);
3388
3389 // Determine which operand(s) are actually in use for this instruction.
3390 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3391 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3392
3393 // If needed, replace operands based on zero mask.
3394 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3395 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3396
3397 // Permute low half of result.
3398 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3399 for (unsigned i = 0; i < HalfSize; ++i)
3400 ShuffleMask[i] = StartIndex + i;
3401
3402 // Permute high half of result.
3403 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3404 for (unsigned i = 0; i < HalfSize; ++i)
3405 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3406
3407 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3408
3409 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3410 Name.starts_with("avx512.mask.vpermil.p") ||
3411 Name.starts_with("avx512.mask.pshuf.d.")) {
3412 Value *Op0 = CI->getArgOperand(0);
3413 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3414 auto *VecTy = cast<FixedVectorType>(CI->getType());
3415 unsigned NumElts = VecTy->getNumElements();
3416 // Calculate the size of each index in the immediate.
3417 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3418 unsigned IdxMask = ((1 << IdxSize) - 1);
3419
3420 SmallVector<int, 8> Idxs(NumElts);
3421 // Lookup the bits for this element, wrapping around the immediate every
3422 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3423 // to offset by the first index of each group.
3424 for (unsigned i = 0; i != NumElts; ++i)
3425 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3426
3427 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3428
3429 if (CI->arg_size() == 4)
3430 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3431 CI->getArgOperand(2));
3432 } else if (Name == "sse2.pshufl.w" ||
3433 Name.starts_with("avx512.mask.pshufl.w.")) {
3434 Value *Op0 = CI->getArgOperand(0);
3435 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3436 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3437
3438 SmallVector<int, 16> Idxs(NumElts);
3439 for (unsigned l = 0; l != NumElts; l += 8) {
3440 for (unsigned i = 0; i != 4; ++i)
3441 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3442 for (unsigned i = 4; i != 8; ++i)
3443 Idxs[i + l] = i + l;
3444 }
3445
3446 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3447
3448 if (CI->arg_size() == 4)
3449 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3450 CI->getArgOperand(2));
3451 } else if (Name == "sse2.pshufh.w" ||
3452 Name.starts_with("avx512.mask.pshufh.w.")) {
3453 Value *Op0 = CI->getArgOperand(0);
3454 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3455 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3456
3457 SmallVector<int, 16> Idxs(NumElts);
3458 for (unsigned l = 0; l != NumElts; l += 8) {
3459 for (unsigned i = 0; i != 4; ++i)
3460 Idxs[i + l] = i + l;
3461 for (unsigned i = 0; i != 4; ++i)
3462 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3463 }
3464
3465 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3466
3467 if (CI->arg_size() == 4)
3468 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3469 CI->getArgOperand(2));
3470 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3471 Value *Op0 = CI->getArgOperand(0);
3472 Value *Op1 = CI->getArgOperand(1);
3473 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3474 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3475
3476 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3477 unsigned HalfLaneElts = NumLaneElts / 2;
3478
3479 SmallVector<int, 16> Idxs(NumElts);
3480 for (unsigned i = 0; i != NumElts; ++i) {
3481 // Base index is the starting element of the lane.
3482 Idxs[i] = i - (i % NumLaneElts);
3483 // If we are half way through the lane switch to the other source.
3484 if ((i % NumLaneElts) >= HalfLaneElts)
3485 Idxs[i] += NumElts;
3486 // Now select the specific element. By adding HalfLaneElts bits from
3487 // the immediate. Wrapping around the immediate every 8-bits.
3488 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3489 }
3490
3491 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3492
3493 Rep =
3494 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3495 } else if (Name.starts_with("avx512.mask.movddup") ||
3496 Name.starts_with("avx512.mask.movshdup") ||
3497 Name.starts_with("avx512.mask.movsldup")) {
3498 Value *Op0 = CI->getArgOperand(0);
3499 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3500 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3501
3502 unsigned Offset = 0;
3503 if (Name.starts_with("avx512.mask.movshdup."))
3504 Offset = 1;
3505
3506 SmallVector<int, 16> Idxs(NumElts);
3507 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3508 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3509 Idxs[i + l + 0] = i + l + Offset;
3510 Idxs[i + l + 1] = i + l + Offset;
3511 }
3512
3513 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3514
3515 Rep =
3516 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3517 } else if (Name.starts_with("avx512.mask.punpckl") ||
3518 Name.starts_with("avx512.mask.unpckl.")) {
3519 Value *Op0 = CI->getArgOperand(0);
3520 Value *Op1 = CI->getArgOperand(1);
3521 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3522 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3523
3524 SmallVector<int, 64> Idxs(NumElts);
3525 for (int l = 0; l != NumElts; l += NumLaneElts)
3526 for (int i = 0; i != NumLaneElts; ++i)
3527 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3528
3529 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3530
3531 Rep =
3532 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3533 } else if (Name.starts_with("avx512.mask.punpckh") ||
3534 Name.starts_with("avx512.mask.unpckh.")) {
3535 Value *Op0 = CI->getArgOperand(0);
3536 Value *Op1 = CI->getArgOperand(1);
3537 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3538 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3539
3540 SmallVector<int, 64> Idxs(NumElts);
3541 for (int l = 0; l != NumElts; l += NumLaneElts)
3542 for (int i = 0; i != NumLaneElts; ++i)
3543 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3544
3545 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3546
3547 Rep =
3548 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3549 } else if (Name.starts_with("avx512.mask.and.") ||
3550 Name.starts_with("avx512.mask.pand.")) {
3551 VectorType *FTy = cast<VectorType>(CI->getType());
3553 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3554 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3555 Rep = Builder.CreateBitCast(Rep, FTy);
3556 Rep =
3557 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3558 } else if (Name.starts_with("avx512.mask.andn.") ||
3559 Name.starts_with("avx512.mask.pandn.")) {
3560 VectorType *FTy = cast<VectorType>(CI->getType());
3562 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3563 Rep = Builder.CreateAnd(Rep,
3564 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3565 Rep = Builder.CreateBitCast(Rep, FTy);
3566 Rep =
3567 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3568 } else if (Name.starts_with("avx512.mask.or.") ||
3569 Name.starts_with("avx512.mask.por.")) {
3570 VectorType *FTy = cast<VectorType>(CI->getType());
3572 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3573 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3574 Rep = Builder.CreateBitCast(Rep, FTy);
3575 Rep =
3576 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3577 } else if (Name.starts_with("avx512.mask.xor.") ||
3578 Name.starts_with("avx512.mask.pxor.")) {
3579 VectorType *FTy = cast<VectorType>(CI->getType());
3581 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3582 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3583 Rep = Builder.CreateBitCast(Rep, FTy);
3584 Rep =
3585 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3586 } else if (Name.starts_with("avx512.mask.padd.")) {
3587 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3588 Rep =
3589 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3590 } else if (Name.starts_with("avx512.mask.psub.")) {
3591 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3592 Rep =
3593 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3594 } else if (Name.starts_with("avx512.mask.pmull.")) {
3595 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3596 Rep =
3597 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3598 } else if (Name.starts_with("avx512.mask.add.p")) {
3599 if (Name.ends_with(".512")) {
3600 Intrinsic::ID IID;
3601 if (Name[17] == 's')
3602 IID = Intrinsic::x86_avx512_add_ps_512;
3603 else
3604 IID = Intrinsic::x86_avx512_add_pd_512;
3605
3606 Rep = Builder.CreateIntrinsic(
3607 IID,
3608 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3609 } else {
3610 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3611 }
3612 Rep =
3613 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3614 } else if (Name.starts_with("avx512.mask.div.p")) {
3615 if (Name.ends_with(".512")) {
3616 Intrinsic::ID IID;
3617 if (Name[17] == 's')
3618 IID = Intrinsic::x86_avx512_div_ps_512;
3619 else
3620 IID = Intrinsic::x86_avx512_div_pd_512;
3621
3622 Rep = Builder.CreateIntrinsic(
3623 IID,
3624 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3625 } else {
3626 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3627 }
3628 Rep =
3629 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3630 } else if (Name.starts_with("avx512.mask.mul.p")) {
3631 if (Name.ends_with(".512")) {
3632 Intrinsic::ID IID;
3633 if (Name[17] == 's')
3634 IID = Intrinsic::x86_avx512_mul_ps_512;
3635 else
3636 IID = Intrinsic::x86_avx512_mul_pd_512;
3637
3638 Rep = Builder.CreateIntrinsic(
3639 IID,
3640 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3641 } else {
3642 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3643 }
3644 Rep =
3645 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3646 } else if (Name.starts_with("avx512.mask.sub.p")) {
3647 if (Name.ends_with(".512")) {
3648 Intrinsic::ID IID;
3649 if (Name[17] == 's')
3650 IID = Intrinsic::x86_avx512_sub_ps_512;
3651 else
3652 IID = Intrinsic::x86_avx512_sub_pd_512;
3653
3654 Rep = Builder.CreateIntrinsic(
3655 IID,
3656 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3657 } else {
3658 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3659 }
3660 Rep =
3661 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3662 } else if ((Name.starts_with("avx512.mask.max.p") ||
3663 Name.starts_with("avx512.mask.min.p")) &&
3664 Name.drop_front(18) == ".512") {
3665 bool IsDouble = Name[17] == 'd';
3666 bool IsMin = Name[13] == 'i';
3667 static const Intrinsic::ID MinMaxTbl[2][2] = {
3668 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3669 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3670 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3671
3672 Rep = Builder.CreateIntrinsic(
3673 IID,
3674 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3675 Rep =
3676 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3677 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3678 Rep =
3679 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3680 {CI->getArgOperand(0), Builder.getInt1(false)});
3681 Rep =
3682 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3683 } else if (Name.starts_with("avx512.mask.psll")) {
3684 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3685 bool IsVariable = Name[16] == 'v';
3686 char Size = Name[16] == '.' ? Name[17]
3687 : Name[17] == '.' ? Name[18]
3688 : Name[18] == '.' ? Name[19]
3689 : Name[20];
3690
3691 Intrinsic::ID IID;
3692 if (IsVariable && Name[17] != '.') {
3693 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3694 IID = Intrinsic::x86_avx2_psllv_q;
3695 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3696 IID = Intrinsic::x86_avx2_psllv_q_256;
3697 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3698 IID = Intrinsic::x86_avx2_psllv_d;
3699 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3700 IID = Intrinsic::x86_avx2_psllv_d_256;
3701 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3702 IID = Intrinsic::x86_avx512_psllv_w_128;
3703 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3704 IID = Intrinsic::x86_avx512_psllv_w_256;
3705 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3706 IID = Intrinsic::x86_avx512_psllv_w_512;
3707 else
3708 llvm_unreachable("Unexpected size");
3709 } else if (Name.ends_with(".128")) {
3710 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3711 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3712 : Intrinsic::x86_sse2_psll_d;
3713 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3714 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3715 : Intrinsic::x86_sse2_psll_q;
3716 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3717 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3718 : Intrinsic::x86_sse2_psll_w;
3719 else
3720 llvm_unreachable("Unexpected size");
3721 } else if (Name.ends_with(".256")) {
3722 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3723 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3724 : Intrinsic::x86_avx2_psll_d;
3725 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3726 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3727 : Intrinsic::x86_avx2_psll_q;
3728 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3729 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3730 : Intrinsic::x86_avx2_psll_w;
3731 else
3732 llvm_unreachable("Unexpected size");
3733 } else {
3734 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3735 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3736 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3737 : Intrinsic::x86_avx512_psll_d_512;
3738 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3739 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3740 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3741 : Intrinsic::x86_avx512_psll_q_512;
3742 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3743 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3744 : Intrinsic::x86_avx512_psll_w_512;
3745 else
3746 llvm_unreachable("Unexpected size");
3747 }
3748
3749 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3750 } else if (Name.starts_with("avx512.mask.psrl")) {
3751 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3752 bool IsVariable = Name[16] == 'v';
3753 char Size = Name[16] == '.' ? Name[17]
3754 : Name[17] == '.' ? Name[18]
3755 : Name[18] == '.' ? Name[19]
3756 : Name[20];
3757
3758 Intrinsic::ID IID;
3759 if (IsVariable && Name[17] != '.') {
3760 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3761 IID = Intrinsic::x86_avx2_psrlv_q;
3762 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3763 IID = Intrinsic::x86_avx2_psrlv_q_256;
3764 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3765 IID = Intrinsic::x86_avx2_psrlv_d;
3766 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3767 IID = Intrinsic::x86_avx2_psrlv_d_256;
3768 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3769 IID = Intrinsic::x86_avx512_psrlv_w_128;
3770 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3771 IID = Intrinsic::x86_avx512_psrlv_w_256;
3772 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3773 IID = Intrinsic::x86_avx512_psrlv_w_512;
3774 else
3775 llvm_unreachable("Unexpected size");
3776 } else if (Name.ends_with(".128")) {
3777 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3778 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3779 : Intrinsic::x86_sse2_psrl_d;
3780 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3781 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3782 : Intrinsic::x86_sse2_psrl_q;
3783 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3784 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3785 : Intrinsic::x86_sse2_psrl_w;
3786 else
3787 llvm_unreachable("Unexpected size");
3788 } else if (Name.ends_with(".256")) {
3789 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3790 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3791 : Intrinsic::x86_avx2_psrl_d;
3792 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3793 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3794 : Intrinsic::x86_avx2_psrl_q;
3795 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3796 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3797 : Intrinsic::x86_avx2_psrl_w;
3798 else
3799 llvm_unreachable("Unexpected size");
3800 } else {
3801 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3802 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3803 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3804 : Intrinsic::x86_avx512_psrl_d_512;
3805 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3806 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3807 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3808 : Intrinsic::x86_avx512_psrl_q_512;
3809 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3810 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3811 : Intrinsic::x86_avx512_psrl_w_512;
3812 else
3813 llvm_unreachable("Unexpected size");
3814 }
3815
3816 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3817 } else if (Name.starts_with("avx512.mask.psra")) {
3818 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3819 bool IsVariable = Name[16] == 'v';
3820 char Size = Name[16] == '.' ? Name[17]
3821 : Name[17] == '.' ? Name[18]
3822 : Name[18] == '.' ? Name[19]
3823 : Name[20];
3824
3825 Intrinsic::ID IID;
3826 if (IsVariable && Name[17] != '.') {
3827 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3828 IID = Intrinsic::x86_avx2_psrav_d;
3829 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3830 IID = Intrinsic::x86_avx2_psrav_d_256;
3831 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3832 IID = Intrinsic::x86_avx512_psrav_w_128;
3833 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3834 IID = Intrinsic::x86_avx512_psrav_w_256;
3835 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3836 IID = Intrinsic::x86_avx512_psrav_w_512;
3837 else
3838 llvm_unreachable("Unexpected size");
3839 } else if (Name.ends_with(".128")) {
3840 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3841 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3842 : Intrinsic::x86_sse2_psra_d;
3843 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3844 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3845 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3846 : Intrinsic::x86_avx512_psra_q_128;
3847 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3848 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3849 : Intrinsic::x86_sse2_psra_w;
3850 else
3851 llvm_unreachable("Unexpected size");
3852 } else if (Name.ends_with(".256")) {
3853 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3854 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3855 : Intrinsic::x86_avx2_psra_d;
3856 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3857 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3858 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3859 : Intrinsic::x86_avx512_psra_q_256;
3860 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3861 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3862 : Intrinsic::x86_avx2_psra_w;
3863 else
3864 llvm_unreachable("Unexpected size");
3865 } else {
3866 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3867 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3868 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3869 : Intrinsic::x86_avx512_psra_d_512;
3870 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3871 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3872 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3873 : Intrinsic::x86_avx512_psra_q_512;
3874 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3875 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3876 : Intrinsic::x86_avx512_psra_w_512;
3877 else
3878 llvm_unreachable("Unexpected size");
3879 }
3880
3881 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3882 } else if (Name.starts_with("avx512.mask.move.s")) {
3883 Rep = upgradeMaskedMove(Builder, *CI);
3884 } else if (Name.starts_with("avx512.cvtmask2")) {
3885 Rep = upgradeMaskToInt(Builder, *CI);
3886 } else if (Name.ends_with(".movntdqa")) {
3888 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3889
3890 LoadInst *LI = Builder.CreateAlignedLoad(
3891 CI->getType(), CI->getArgOperand(0),
3893 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3894 Rep = LI;
3895 } else if (Name.starts_with("fma.vfmadd.") ||
3896 Name.starts_with("fma.vfmsub.") ||
3897 Name.starts_with("fma.vfnmadd.") ||
3898 Name.starts_with("fma.vfnmsub.")) {
3899 bool NegMul = Name[6] == 'n';
3900 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3901 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3902
3903 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3904 CI->getArgOperand(2)};
3905
3906 if (IsScalar) {
3907 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3908 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3909 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3910 }
3911
3912 if (NegMul && !IsScalar)
3913 Ops[0] = Builder.CreateFNeg(Ops[0]);
3914 if (NegMul && IsScalar)
3915 Ops[1] = Builder.CreateFNeg(Ops[1]);
3916 if (NegAcc)
3917 Ops[2] = Builder.CreateFNeg(Ops[2]);
3918
3919 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3920
3921 if (IsScalar)
3922 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3923 } else if (Name.starts_with("fma4.vfmadd.s")) {
3924 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3925 CI->getArgOperand(2)};
3926
3927 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3928 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3929 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3930
3931 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3932
3933 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3934 Rep, (uint64_t)0);
3935 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3936 Name.starts_with("avx512.maskz.vfmadd.s") ||
3937 Name.starts_with("avx512.mask3.vfmadd.s") ||
3938 Name.starts_with("avx512.mask3.vfmsub.s") ||
3939 Name.starts_with("avx512.mask3.vfnmsub.s")) {
3940 bool IsMask3 = Name[11] == '3';
3941 bool IsMaskZ = Name[11] == 'z';
3942 // Drop the "avx512.mask." to make it easier.
3943 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3944 bool NegMul = Name[2] == 'n';
3945 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3946
3947 Value *A = CI->getArgOperand(0);
3948 Value *B = CI->getArgOperand(1);
3949 Value *C = CI->getArgOperand(2);
3950
3951 if (NegMul && (IsMask3 || IsMaskZ))
3952 A = Builder.CreateFNeg(A);
3953 if (NegMul && !(IsMask3 || IsMaskZ))
3954 B = Builder.CreateFNeg(B);
3955 if (NegAcc)
3956 C = Builder.CreateFNeg(C);
3957
3958 A = Builder.CreateExtractElement(A, (uint64_t)0);
3959 B = Builder.CreateExtractElement(B, (uint64_t)0);
3960 C = Builder.CreateExtractElement(C, (uint64_t)0);
3961
3962 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3963 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3964 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3965
3966 Intrinsic::ID IID;
3967 if (Name.back() == 'd')
3968 IID = Intrinsic::x86_avx512_vfmadd_f64;
3969 else
3970 IID = Intrinsic::x86_avx512_vfmadd_f32;
3971 Rep = Builder.CreateIntrinsic(IID, Ops);
3972 } else {
3973 Rep = Builder.CreateFMA(A, B, C);
3974 }
3975
3976 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
3977 : IsMask3 ? C
3978 : A;
3979
3980 // For Mask3 with NegAcc, we need to create a new extractelement that
3981 // avoids the negation above.
3982 if (NegAcc && IsMask3)
3983 PassThru =
3984 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3985
3986 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3987 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3988 (uint64_t)0);
3989 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3990 Name.starts_with("avx512.mask.vfnmadd.p") ||
3991 Name.starts_with("avx512.mask.vfnmsub.p") ||
3992 Name.starts_with("avx512.mask3.vfmadd.p") ||
3993 Name.starts_with("avx512.mask3.vfmsub.p") ||
3994 Name.starts_with("avx512.mask3.vfnmsub.p") ||
3995 Name.starts_with("avx512.maskz.vfmadd.p")) {
3996 bool IsMask3 = Name[11] == '3';
3997 bool IsMaskZ = Name[11] == 'z';
3998 // Drop the "avx512.mask." to make it easier.
3999 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4000 bool NegMul = Name[2] == 'n';
4001 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4002
4003 Value *A = CI->getArgOperand(0);
4004 Value *B = CI->getArgOperand(1);
4005 Value *C = CI->getArgOperand(2);
4006
4007 if (NegMul && (IsMask3 || IsMaskZ))
4008 A = Builder.CreateFNeg(A);
4009 if (NegMul && !(IsMask3 || IsMaskZ))
4010 B = Builder.CreateFNeg(B);
4011 if (NegAcc)
4012 C = Builder.CreateFNeg(C);
4013
4014 if (CI->arg_size() == 5 &&
4015 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4016 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4017 Intrinsic::ID IID;
4018 // Check the character before ".512" in string.
4019 if (Name[Name.size() - 5] == 's')
4020 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4021 else
4022 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4023
4024 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4025 } else {
4026 Rep = Builder.CreateFMA(A, B, C);
4027 }
4028
4029 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4030 : IsMask3 ? CI->getArgOperand(2)
4031 : CI->getArgOperand(0);
4032
4033 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4034 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4035 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4036 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4037 Intrinsic::ID IID;
4038 if (VecWidth == 128 && EltWidth == 32)
4039 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4040 else if (VecWidth == 256 && EltWidth == 32)
4041 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4042 else if (VecWidth == 128 && EltWidth == 64)
4043 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4044 else if (VecWidth == 256 && EltWidth == 64)
4045 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4046 else
4047 llvm_unreachable("Unexpected intrinsic");
4048
4049 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4050 CI->getArgOperand(2)};
4051 Ops[2] = Builder.CreateFNeg(Ops[2]);
4052 Rep = Builder.CreateIntrinsic(IID, Ops);
4053 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4054 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4055 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4056 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4057 bool IsMask3 = Name[11] == '3';
4058 bool IsMaskZ = Name[11] == 'z';
4059 // Drop the "avx512.mask." to make it easier.
4060 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4061 bool IsSubAdd = Name[3] == 's';
4062 if (CI->arg_size() == 5) {
4063 Intrinsic::ID IID;
4064 // Check the character before ".512" in string.
4065 if (Name[Name.size() - 5] == 's')
4066 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4067 else
4068 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4069
4070 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4071 CI->getArgOperand(2), CI->getArgOperand(4)};
4072 if (IsSubAdd)
4073 Ops[2] = Builder.CreateFNeg(Ops[2]);
4074
4075 Rep = Builder.CreateIntrinsic(IID, Ops);
4076 } else {
4077 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4078
4079 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4080 CI->getArgOperand(2)};
4081
4083 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4084 Value *Odd = Builder.CreateCall(FMA, Ops);
4085 Ops[2] = Builder.CreateFNeg(Ops[2]);
4086 Value *Even = Builder.CreateCall(FMA, Ops);
4087
4088 if (IsSubAdd)
4089 std::swap(Even, Odd);
4090
4091 SmallVector<int, 32> Idxs(NumElts);
4092 for (int i = 0; i != NumElts; ++i)
4093 Idxs[i] = i + (i % 2) * NumElts;
4094
4095 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4096 }
4097
4098 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4099 : IsMask3 ? CI->getArgOperand(2)
4100 : CI->getArgOperand(0);
4101
4102 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4103 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4104 Name.starts_with("avx512.maskz.pternlog.")) {
4105 bool ZeroMask = Name[11] == 'z';
4106 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4107 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4108 Intrinsic::ID IID;
4109 if (VecWidth == 128 && EltWidth == 32)
4110 IID = Intrinsic::x86_avx512_pternlog_d_128;
4111 else if (VecWidth == 256 && EltWidth == 32)
4112 IID = Intrinsic::x86_avx512_pternlog_d_256;
4113 else if (VecWidth == 512 && EltWidth == 32)
4114 IID = Intrinsic::x86_avx512_pternlog_d_512;
4115 else if (VecWidth == 128 && EltWidth == 64)
4116 IID = Intrinsic::x86_avx512_pternlog_q_128;
4117 else if (VecWidth == 256 && EltWidth == 64)
4118 IID = Intrinsic::x86_avx512_pternlog_q_256;
4119 else if (VecWidth == 512 && EltWidth == 64)
4120 IID = Intrinsic::x86_avx512_pternlog_q_512;
4121 else
4122 llvm_unreachable("Unexpected intrinsic");
4123
4124 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4125 CI->getArgOperand(2), CI->getArgOperand(3)};
4126 Rep = Builder.CreateIntrinsic(IID, Args);
4127 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4128 : CI->getArgOperand(0);
4129 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4130 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4131 Name.starts_with("avx512.maskz.vpmadd52")) {
4132 bool ZeroMask = Name[11] == 'z';
4133 bool High = Name[20] == 'h' || Name[21] == 'h';
4134 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4135 Intrinsic::ID IID;
4136 if (VecWidth == 128 && !High)
4137 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4138 else if (VecWidth == 256 && !High)
4139 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4140 else if (VecWidth == 512 && !High)
4141 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4142 else if (VecWidth == 128 && High)
4143 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4144 else if (VecWidth == 256 && High)
4145 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4146 else if (VecWidth == 512 && High)
4147 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4148 else
4149 llvm_unreachable("Unexpected intrinsic");
4150
4151 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4152 CI->getArgOperand(2)};
4153 Rep = Builder.CreateIntrinsic(IID, Args);
4154 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4155 : CI->getArgOperand(0);
4156 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4157 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4158 Name.starts_with("avx512.mask.vpermt2var.") ||
4159 Name.starts_with("avx512.maskz.vpermt2var.")) {
4160 bool ZeroMask = Name[11] == 'z';
4161 bool IndexForm = Name[17] == 'i';
4162 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4163 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4164 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4165 Name.starts_with("avx512.mask.vpdpbusds.") ||
4166 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4167 bool ZeroMask = Name[11] == 'z';
4168 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4169 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4170 Intrinsic::ID IID;
4171 if (VecWidth == 128 && !IsSaturating)
4172 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4173 else if (VecWidth == 256 && !IsSaturating)
4174 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4175 else if (VecWidth == 512 && !IsSaturating)
4176 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4177 else if (VecWidth == 128 && IsSaturating)
4178 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4179 else if (VecWidth == 256 && IsSaturating)
4180 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4181 else if (VecWidth == 512 && IsSaturating)
4182 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4183 else
4184 llvm_unreachable("Unexpected intrinsic");
4185
4186 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4187 CI->getArgOperand(2)};
4188
4189 // Input arguments types were incorrectly set to vectors of i32 before but
4190 // they should be vectors of i8. Insert bit cast when encountering the old
4191 // types
4192 if (Args[1]->getType()->isVectorTy() &&
4193 cast<VectorType>(Args[1]->getType())
4194 ->getElementType()
4195 ->isIntegerTy(32) &&
4196 Args[2]->getType()->isVectorTy() &&
4197 cast<VectorType>(Args[2]->getType())
4198 ->getElementType()
4199 ->isIntegerTy(32)) {
4200 Type *NewArgType = nullptr;
4201 if (VecWidth == 128)
4202 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4203 else if (VecWidth == 256)
4204 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4205 else if (VecWidth == 512)
4206 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4207 else
4208 llvm_unreachable("Unexpected vector bit width");
4209
4210 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4211 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4212 }
4213
4214 Rep = Builder.CreateIntrinsic(IID, Args);
4215 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4216 : CI->getArgOperand(0);
4217 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4218 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4219 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4220 Name.starts_with("avx512.mask.vpdpwssds.") ||
4221 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4222 bool ZeroMask = Name[11] == 'z';
4223 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4224 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4225 Intrinsic::ID IID;
4226 if (VecWidth == 128 && !IsSaturating)
4227 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4228 else if (VecWidth == 256 && !IsSaturating)
4229 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4230 else if (VecWidth == 512 && !IsSaturating)
4231 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4232 else if (VecWidth == 128 && IsSaturating)
4233 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4234 else if (VecWidth == 256 && IsSaturating)
4235 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4236 else if (VecWidth == 512 && IsSaturating)
4237 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4238 else
4239 llvm_unreachable("Unexpected intrinsic");
4240
4241 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4242 CI->getArgOperand(2)};
4243 Rep = Builder.CreateIntrinsic(IID, Args);
4244 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4245 : CI->getArgOperand(0);
4246 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4247 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4248 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4249 Name == "subborrow.u32" || Name == "subborrow.u64") {
4250 Intrinsic::ID IID;
4251 if (Name[0] == 'a' && Name.back() == '2')
4252 IID = Intrinsic::x86_addcarry_32;
4253 else if (Name[0] == 'a' && Name.back() == '4')
4254 IID = Intrinsic::x86_addcarry_64;
4255 else if (Name[0] == 's' && Name.back() == '2')
4256 IID = Intrinsic::x86_subborrow_32;
4257 else if (Name[0] == 's' && Name.back() == '4')
4258 IID = Intrinsic::x86_subborrow_64;
4259 else
4260 llvm_unreachable("Unexpected intrinsic");
4261
4262 // Make a call with 3 operands.
4263 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4264 CI->getArgOperand(2)};
4265 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4266
4267 // Extract the second result and store it.
4268 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4269 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4270 // Replace the original call result with the first result of the new call.
4271 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4272
4273 CI->replaceAllUsesWith(CF);
4274 Rep = nullptr;
4275 } else if (Name.starts_with("avx512.mask.") &&
4276 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4277 // Rep will be updated by the call in the condition.
4278 }
4279
4280 return Rep;
4281}
4282
4284 Function *F, IRBuilder<> &Builder) {
4285 if (Name.starts_with("neon.bfcvt")) {
4286 if (Name.starts_with("neon.bfcvtn2")) {
4287 SmallVector<int, 32> LoMask(4);
4288 std::iota(LoMask.begin(), LoMask.end(), 0);
4289 SmallVector<int, 32> ConcatMask(8);
4290 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4291 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4292 Value *Trunc =
4293 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4294 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4295 } else if (Name.starts_with("neon.bfcvtn")) {
4296 SmallVector<int, 32> ConcatMask(8);
4297 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4298 Type *V4BF16 =
4299 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4300 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4301 dbgs() << "Trunc: " << *Trunc << "\n";
4302 return Builder.CreateShuffleVector(
4303 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4304 } else {
4305 return Builder.CreateFPTrunc(CI->getOperand(0),
4306 Type::getBFloatTy(F->getContext()));
4307 }
4308 } else if (Name.starts_with("sve.fcvt")) {
4309 Intrinsic::ID NewID =
4311 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4312 .Case("sve.fcvtnt.bf16f32",
4313 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4315 if (NewID == Intrinsic::not_intrinsic)
4316 llvm_unreachable("Unhandled Intrinsic!");
4317
4318 SmallVector<Value *, 3> Args(CI->args());
4319
4320 // The original intrinsics incorrectly used a predicate based on the
4321 // smallest element type rather than the largest.
4322 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4323 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4324
4325 if (Args[1]->getType() != BadPredTy)
4326 llvm_unreachable("Unexpected predicate type!");
4327
4328 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4329 BadPredTy, Args[1]);
4330 Args[1] = Builder.CreateIntrinsic(
4331 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4332
4333 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4334 CI->getName());
4335 }
4336
4337 llvm_unreachable("Unhandled Intrinsic!");
4338}
4339
4341 IRBuilder<> &Builder) {
4342 if (Name == "mve.vctp64.old") {
4343 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4344 // correct type.
4345 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4346 CI->getArgOperand(0),
4347 /*FMFSource=*/nullptr, CI->getName());
4348 Value *C1 = Builder.CreateIntrinsic(
4349 Intrinsic::arm_mve_pred_v2i,
4350 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4351 return Builder.CreateIntrinsic(
4352 Intrinsic::arm_mve_pred_i2v,
4353 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4354 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4355 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4356 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4357 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4358 Name ==
4359 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4360 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4361 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4362 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4363 Name ==
4364 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4365 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4366 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4367 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4368 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4369 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4370 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4371 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4372 std::vector<Type *> Tys;
4373 unsigned ID = CI->getIntrinsicID();
4374 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4375 switch (ID) {
4376 case Intrinsic::arm_mve_mull_int_predicated:
4377 case Intrinsic::arm_mve_vqdmull_predicated:
4378 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4379 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4380 break;
4381 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4382 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4383 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4384 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4385 V2I1Ty};
4386 break;
4387 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4388 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4389 CI->getOperand(1)->getType(), V2I1Ty};
4390 break;
4391 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4392 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4393 CI->getOperand(2)->getType(), V2I1Ty};
4394 break;
4395 case Intrinsic::arm_cde_vcx1q_predicated:
4396 case Intrinsic::arm_cde_vcx1qa_predicated:
4397 case Intrinsic::arm_cde_vcx2q_predicated:
4398 case Intrinsic::arm_cde_vcx2qa_predicated:
4399 case Intrinsic::arm_cde_vcx3q_predicated:
4400 case Intrinsic::arm_cde_vcx3qa_predicated:
4401 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4402 break;
4403 default:
4404 llvm_unreachable("Unhandled Intrinsic!");
4405 }
4406
4407 std::vector<Value *> Ops;
4408 for (Value *Op : CI->args()) {
4409 Type *Ty = Op->getType();
4410 if (Ty->getScalarSizeInBits() == 1) {
4411 Value *C1 = Builder.CreateIntrinsic(
4412 Intrinsic::arm_mve_pred_v2i,
4413 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4414 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4415 }
4416 Ops.push_back(Op);
4417 }
4418
4419 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4420 CI->getName());
4421 }
4422 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4423}
4424
4425// These are expected to have the arguments:
4426// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4427//
4428// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4429//
4431 Function *F, IRBuilder<> &Builder) {
4432 AtomicRMWInst::BinOp RMWOp =
4434 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4435 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4436 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4437 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4438 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4439 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4440 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4441 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4442 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4443 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4444 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
4445
4446 unsigned NumOperands = CI->getNumOperands();
4447 if (NumOperands < 3) // Malformed bitcode.
4448 return nullptr;
4449
4450 Value *Ptr = CI->getArgOperand(0);
4451 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4452 if (!PtrTy) // Malformed.
4453 return nullptr;
4454
4455 Value *Val = CI->getArgOperand(1);
4456 if (Val->getType() != CI->getType()) // Malformed.
4457 return nullptr;
4458
4459 ConstantInt *OrderArg = nullptr;
4460 bool IsVolatile = false;
4461
4462 // These should have 5 arguments (plus the callee). A separate version of the
4463 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4464 if (NumOperands > 3)
4465 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4466
4467 // Ignore scope argument at 3
4468
4469 if (NumOperands > 5) {
4470 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4471 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4472 }
4473
4475 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4476 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4479
4480 LLVMContext &Ctx = F->getContext();
4481
4482 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4483 Type *RetTy = CI->getType();
4484 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4485 if (VT->getElementType()->isIntegerTy(16)) {
4486 VectorType *AsBF16 =
4487 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4488 Val = Builder.CreateBitCast(Val, AsBF16);
4489 }
4490 }
4491
4492 // The scope argument never really worked correctly. Use agent as the most
4493 // conservative option which should still always produce the instruction.
4494 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4495 AtomicRMWInst *RMW =
4496 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4497
4498 unsigned AddrSpace = PtrTy->getAddressSpace();
4499 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4500 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4501 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4502 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4503 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4504 }
4505
4506 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4507 MDBuilder MDB(F->getContext());
4508 MDNode *RangeNotPrivate =
4511 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4512 }
4513
4514 if (IsVolatile)
4515 RMW->setVolatile(true);
4516
4517 return Builder.CreateBitCast(RMW, RetTy);
4518}
4519
4520/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4521/// plain MDNode, as it's the verifier's job to check these are the correct
4522/// types later.
4523static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4524 if (Op < CI->arg_size()) {
4525 if (MetadataAsValue *MAV =
4527 Metadata *MD = MAV->getMetadata();
4528 return dyn_cast_if_present<MDNode>(MD);
4529 }
4530 }
4531 return nullptr;
4532}
4533
4534/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4535static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4536 if (Op < CI->arg_size())
4538 return MAV->getMetadata();
4539 return nullptr;
4540}
4541
4543 // The MDNode attached to this instruction might not be the correct type,
4544 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4545 return I->getDebugLoc().getAsMDNode();
4546}
4547
4548/// Convert debug intrinsic calls to non-instruction debug records.
4549/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4550/// \p CI - The debug intrinsic call.
4552 DbgRecord *DR = nullptr;
4553 if (Name == "label") {
4555 CI->getDebugLoc());
4556 } else if (Name == "assign") {
4559 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4560 unwrapMAVMetadataOp(CI, 4),
4561 /*The address is a Value ref, it will be stored as a Metadata */
4562 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4563 } else if (Name == "declare") {
4566 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4567 getDebugLocSafe(CI));
4568 } else if (Name == "addr") {
4569 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4570 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4571 // Don't try to add something to the expression if it's not an expression.
4572 // Instead, allow the verifier to fail later.
4573 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4574 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4575 }
4578 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4579 getDebugLocSafe(CI));
4580 } else if (Name == "value") {
4581 // An old version of dbg.value had an extra offset argument.
4582 unsigned VarOp = 1;
4583 unsigned ExprOp = 2;
4584 if (CI->arg_size() == 4) {
4586 // Nonzero offset dbg.values get dropped without a replacement.
4587 if (!Offset || !Offset->isZeroValue())
4588 return;
4589 VarOp = 2;
4590 ExprOp = 3;
4591 }
4594 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4595 nullptr, getDebugLocSafe(CI));
4596 }
4597 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4598 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4599}
4600
4601/// Upgrade a call to an old intrinsic. All argument and return casting must be
4602/// provided to seamlessly integrate with existing context.
4604 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4605 // checks the callee's function type matches. It's likely we need to handle
4606 // type changes here.
4608 if (!F)
4609 return;
4610
4611 LLVMContext &C = CI->getContext();
4612 IRBuilder<> Builder(C);
4613 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4614
4615 if (!NewFn) {
4616 // Get the Function's name.
4617 StringRef Name = F->getName();
4618
4619 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4620 Name = Name.substr(5);
4621
4622 bool IsX86 = Name.consume_front("x86.");
4623 bool IsNVVM = Name.consume_front("nvvm.");
4624 bool IsAArch64 = Name.consume_front("aarch64.");
4625 bool IsARM = Name.consume_front("arm.");
4626 bool IsAMDGCN = Name.consume_front("amdgcn.");
4627 bool IsDbg = Name.consume_front("dbg.");
4628 Value *Rep = nullptr;
4629
4630 if (!IsX86 && Name == "stackprotectorcheck") {
4631 Rep = nullptr;
4632 } else if (IsNVVM) {
4633 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4634 } else if (IsX86) {
4635 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4636 } else if (IsAArch64) {
4637 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4638 } else if (IsARM) {
4639 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4640 } else if (IsAMDGCN) {
4641 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4642 } else if (IsDbg) {
4644 } else {
4645 llvm_unreachable("Unknown function for CallBase upgrade.");
4646 }
4647
4648 if (Rep)
4649 CI->replaceAllUsesWith(Rep);
4650 CI->eraseFromParent();
4651 return;
4652 }
4653
4654 const auto &DefaultCase = [&]() -> void {
4655 if (F == NewFn)
4656 return;
4657
4658 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4659 // Handle generic mangling change.
4660 assert(
4661 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4662 "Unknown function for CallBase upgrade and isn't just a name change");
4663 CI->setCalledFunction(NewFn);
4664 return;
4665 }
4666
4667 // This must be an upgrade from a named to a literal struct.
4668 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4669 assert(OldST != NewFn->getReturnType() &&
4670 "Return type must have changed");
4671 assert(OldST->getNumElements() ==
4672 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4673 "Must have same number of elements");
4674
4675 SmallVector<Value *> Args(CI->args());
4676 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4677 NewCI->setAttributes(CI->getAttributes());
4678 Value *Res = PoisonValue::get(OldST);
4679 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4680 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4681 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4682 }
4683 CI->replaceAllUsesWith(Res);
4684 CI->eraseFromParent();
4685 return;
4686 }
4687
4688 // We're probably about to produce something invalid. Let the verifier catch
4689 // it instead of dying here.
4690 CI->setCalledOperand(
4692 return;
4693 };
4694 CallInst *NewCall = nullptr;
4695 switch (NewFn->getIntrinsicID()) {
4696 default: {
4697 DefaultCase();
4698 return;
4699 }
4700 case Intrinsic::arm_neon_vst1:
4701 case Intrinsic::arm_neon_vst2:
4702 case Intrinsic::arm_neon_vst3:
4703 case Intrinsic::arm_neon_vst4:
4704 case Intrinsic::arm_neon_vst2lane:
4705 case Intrinsic::arm_neon_vst3lane:
4706 case Intrinsic::arm_neon_vst4lane: {
4707 SmallVector<Value *, 4> Args(CI->args());
4708 NewCall = Builder.CreateCall(NewFn, Args);
4709 break;
4710 }
4711 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4712 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4713 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4714 LLVMContext &Ctx = F->getParent()->getContext();
4715 SmallVector<Value *, 4> Args(CI->args());
4716 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4717 cast<ConstantInt>(Args[3])->getZExtValue());
4718 NewCall = Builder.CreateCall(NewFn, Args);
4719 break;
4720 }
4721 case Intrinsic::aarch64_sve_ld3_sret:
4722 case Intrinsic::aarch64_sve_ld4_sret:
4723 case Intrinsic::aarch64_sve_ld2_sret: {
4724 StringRef Name = F->getName();
4725 Name = Name.substr(5);
4726 unsigned N = StringSwitch<unsigned>(Name)
4727 .StartsWith("aarch64.sve.ld2", 2)
4728 .StartsWith("aarch64.sve.ld3", 3)
4729 .StartsWith("aarch64.sve.ld4", 4)
4730 .Default(0);
4731 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4732 unsigned MinElts = RetTy->getMinNumElements() / N;
4733 SmallVector<Value *, 2> Args(CI->args());
4734 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4735 Value *Ret = llvm::PoisonValue::get(RetTy);
4736 for (unsigned I = 0; I < N; I++) {
4737 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4738 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
4739 }
4740 NewCall = dyn_cast<CallInst>(Ret);
4741 break;
4742 }
4743
4744 case Intrinsic::coro_end: {
4745 SmallVector<Value *, 3> Args(CI->args());
4746 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4747 NewCall = Builder.CreateCall(NewFn, Args);
4748 break;
4749 }
4750
4751 case Intrinsic::vector_extract: {
4752 StringRef Name = F->getName();
4753 Name = Name.substr(5); // Strip llvm
4754 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4755 DefaultCase();
4756 return;
4757 }
4758 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4759 unsigned MinElts = RetTy->getMinNumElements();
4760 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4761 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4762 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4763 break;
4764 }
4765
4766 case Intrinsic::vector_insert: {
4767 StringRef Name = F->getName();
4768 Name = Name.substr(5);
4769 if (!Name.starts_with("aarch64.sve.tuple")) {
4770 DefaultCase();
4771 return;
4772 }
4773 if (Name.starts_with("aarch64.sve.tuple.set")) {
4774 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4775 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4776 Value *NewIdx =
4777 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4778 NewCall = Builder.CreateCall(
4779 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4780 break;
4781 }
4782 if (Name.starts_with("aarch64.sve.tuple.create")) {
4783 unsigned N = StringSwitch<unsigned>(Name)
4784 .StartsWith("aarch64.sve.tuple.create2", 2)
4785 .StartsWith("aarch64.sve.tuple.create3", 3)
4786 .StartsWith("aarch64.sve.tuple.create4", 4)
4787 .Default(0);
4788 assert(N > 1 && "Create is expected to be between 2-4");
4789 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4790 Value *Ret = llvm::PoisonValue::get(RetTy);
4791 unsigned MinElts = RetTy->getMinNumElements() / N;
4792 for (unsigned I = 0; I < N; I++) {
4793 Value *V = CI->getArgOperand(I);
4794 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
4795 }
4796 NewCall = dyn_cast<CallInst>(Ret);
4797 }
4798 break;
4799 }
4800
4801 case Intrinsic::arm_neon_bfdot:
4802 case Intrinsic::arm_neon_bfmmla:
4803 case Intrinsic::arm_neon_bfmlalb:
4804 case Intrinsic::arm_neon_bfmlalt:
4805 case Intrinsic::aarch64_neon_bfdot:
4806 case Intrinsic::aarch64_neon_bfmmla:
4807 case Intrinsic::aarch64_neon_bfmlalb:
4808 case Intrinsic::aarch64_neon_bfmlalt: {
4810 assert(CI->arg_size() == 3 &&
4811 "Mismatch between function args and call args");
4812 size_t OperandWidth =
4814 assert((OperandWidth == 64 || OperandWidth == 128) &&
4815 "Unexpected operand width");
4816 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4817 auto Iter = CI->args().begin();
4818 Args.push_back(*Iter++);
4819 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4820 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4821 NewCall = Builder.CreateCall(NewFn, Args);
4822 break;
4823 }
4824
4825 case Intrinsic::bitreverse:
4826 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4827 break;
4828
4829 case Intrinsic::ctlz:
4830 case Intrinsic::cttz:
4831 assert(CI->arg_size() == 1 &&
4832 "Mismatch between function args and call args");
4833 NewCall =
4834 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4835 break;
4836
4837 case Intrinsic::objectsize: {
4838 Value *NullIsUnknownSize =
4839 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4840 Value *Dynamic =
4841 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4842 NewCall = Builder.CreateCall(
4843 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4844 break;
4845 }
4846
4847 case Intrinsic::ctpop:
4848 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4849 break;
4850
4851 case Intrinsic::convert_from_fp16:
4852 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4853 break;
4854
4855 case Intrinsic::dbg_value: {
4856 StringRef Name = F->getName();
4857 Name = Name.substr(5); // Strip llvm.
4858 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4859 if (Name.starts_with("dbg.addr")) {
4861 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4862 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4863 NewCall =
4864 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4865 MetadataAsValue::get(C, Expr)});
4866 break;
4867 }
4868
4869 // Upgrade from the old version that had an extra offset argument.
4870 assert(CI->arg_size() == 4);
4871 // Drop nonzero offsets instead of attempting to upgrade them.
4873 if (Offset->isZeroValue()) {
4874 NewCall = Builder.CreateCall(
4875 NewFn,
4876 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4877 break;
4878 }
4879 CI->eraseFromParent();
4880 return;
4881 }
4882
4883 case Intrinsic::ptr_annotation:
4884 // Upgrade from versions that lacked the annotation attribute argument.
4885 if (CI->arg_size() != 4) {
4886 DefaultCase();
4887 return;
4888 }
4889
4890 // Create a new call with an added null annotation attribute argument.
4891 NewCall = Builder.CreateCall(
4892 NewFn,
4893 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4894 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4895 NewCall->takeName(CI);
4896 CI->replaceAllUsesWith(NewCall);
4897 CI->eraseFromParent();
4898 return;
4899
4900 case Intrinsic::var_annotation:
4901 // Upgrade from versions that lacked the annotation attribute argument.
4902 if (CI->arg_size() != 4) {
4903 DefaultCase();
4904 return;
4905 }
4906 // Create a new call with an added null annotation attribute argument.
4907 NewCall = Builder.CreateCall(
4908 NewFn,
4909 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4910 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4911 NewCall->takeName(CI);
4912 CI->replaceAllUsesWith(NewCall);
4913 CI->eraseFromParent();
4914 return;
4915
4916 case Intrinsic::riscv_aes32dsi:
4917 case Intrinsic::riscv_aes32dsmi:
4918 case Intrinsic::riscv_aes32esi:
4919 case Intrinsic::riscv_aes32esmi:
4920 case Intrinsic::riscv_sm4ks:
4921 case Intrinsic::riscv_sm4ed: {
4922 // The last argument to these intrinsics used to be i8 and changed to i32.
4923 // The type overload for sm4ks and sm4ed was removed.
4924 Value *Arg2 = CI->getArgOperand(2);
4925 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4926 return;
4927
4928 Value *Arg0 = CI->getArgOperand(0);
4929 Value *Arg1 = CI->getArgOperand(1);
4930 if (CI->getType()->isIntegerTy(64)) {
4931 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4932 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4933 }
4934
4935 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4936 cast<ConstantInt>(Arg2)->getZExtValue());
4937
4938 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4939 Value *Res = NewCall;
4940 if (Res->getType() != CI->getType())
4941 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4942 NewCall->takeName(CI);
4943 CI->replaceAllUsesWith(Res);
4944 CI->eraseFromParent();
4945 return;
4946 }
4947 case Intrinsic::nvvm_mapa_shared_cluster: {
4948 // Create a new call with the correct address space.
4949 NewCall =
4950 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
4951 Value *Res = NewCall;
4952 Res = Builder.CreateAddrSpaceCast(
4953 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
4954 NewCall->takeName(CI);
4955 CI->replaceAllUsesWith(Res);
4956 CI->eraseFromParent();
4957 return;
4958 }
4959 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
4960 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
4961 // Create a new call with the correct address space.
4962 SmallVector<Value *, 4> Args(CI->args());
4963 Args[0] = Builder.CreateAddrSpaceCast(
4964 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
4965
4966 NewCall = Builder.CreateCall(NewFn, Args);
4967 NewCall->takeName(CI);
4968 CI->replaceAllUsesWith(NewCall);
4969 CI->eraseFromParent();
4970 return;
4971 }
4972 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
4973 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
4974 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
4975 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
4976 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
4977 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
4978 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
4979 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
4980 SmallVector<Value *, 16> Args(CI->args());
4981
4982 // Create AddrSpaceCast to shared_cluster if needed.
4983 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
4984 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
4986 Args[0] = Builder.CreateAddrSpaceCast(
4987 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
4988
4989 // Attach the flag argument for cta_group, with a
4990 // default value of 0. This handles case (2) in
4991 // shouldUpgradeNVPTXTMAG2SIntrinsics().
4992 size_t NumArgs = CI->arg_size();
4993 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
4994 if (!FlagArg->getType()->isIntegerTy(1))
4995 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
4996
4997 NewCall = Builder.CreateCall(NewFn, Args);
4998 NewCall->takeName(CI);
4999 CI->replaceAllUsesWith(NewCall);
5000 CI->eraseFromParent();
5001 return;
5002 }
5003 case Intrinsic::riscv_sha256sig0:
5004 case Intrinsic::riscv_sha256sig1:
5005 case Intrinsic::riscv_sha256sum0:
5006 case Intrinsic::riscv_sha256sum1:
5007 case Intrinsic::riscv_sm3p0:
5008 case Intrinsic::riscv_sm3p1: {
5009 // The last argument to these intrinsics used to be i8 and changed to i32.
5010 // The type overload for sm4ks and sm4ed was removed.
5011 if (!CI->getType()->isIntegerTy(64))
5012 return;
5013
5014 Value *Arg =
5015 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5016
5017 NewCall = Builder.CreateCall(NewFn, Arg);
5018 Value *Res =
5019 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5020 NewCall->takeName(CI);
5021 CI->replaceAllUsesWith(Res);
5022 CI->eraseFromParent();
5023 return;
5024 }
5025
5026 case Intrinsic::x86_xop_vfrcz_ss:
5027 case Intrinsic::x86_xop_vfrcz_sd:
5028 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5029 break;
5030
5031 case Intrinsic::x86_xop_vpermil2pd:
5032 case Intrinsic::x86_xop_vpermil2ps:
5033 case Intrinsic::x86_xop_vpermil2pd_256:
5034 case Intrinsic::x86_xop_vpermil2ps_256: {
5035 SmallVector<Value *, 4> Args(CI->args());
5036 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5037 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5038 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5039 NewCall = Builder.CreateCall(NewFn, Args);
5040 break;
5041 }
5042
5043 case Intrinsic::x86_sse41_ptestc:
5044 case Intrinsic::x86_sse41_ptestz:
5045 case Intrinsic::x86_sse41_ptestnzc: {
5046 // The arguments for these intrinsics used to be v4f32, and changed
5047 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5048 // So, the only thing required is a bitcast for both arguments.
5049 // First, check the arguments have the old type.
5050 Value *Arg0 = CI->getArgOperand(0);
5051 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5052 return;
5053
5054 // Old intrinsic, add bitcasts
5055 Value *Arg1 = CI->getArgOperand(1);
5056
5057 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5058
5059 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5060 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5061
5062 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5063 break;
5064 }
5065
5066 case Intrinsic::x86_rdtscp: {
5067 // This used to take 1 arguments. If we have no arguments, it is already
5068 // upgraded.
5069 if (CI->getNumOperands() == 0)
5070 return;
5071
5072 NewCall = Builder.CreateCall(NewFn);
5073 // Extract the second result and store it.
5074 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5075 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5076 // Replace the original call result with the first result of the new call.
5077 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5078
5079 NewCall->takeName(CI);
5080 CI->replaceAllUsesWith(TSC);
5081 CI->eraseFromParent();
5082 return;
5083 }
5084
5085 case Intrinsic::x86_sse41_insertps:
5086 case Intrinsic::x86_sse41_dppd:
5087 case Intrinsic::x86_sse41_dpps:
5088 case Intrinsic::x86_sse41_mpsadbw:
5089 case Intrinsic::x86_avx_dp_ps_256:
5090 case Intrinsic::x86_avx2_mpsadbw: {
5091 // Need to truncate the last argument from i32 to i8 -- this argument models
5092 // an inherently 8-bit immediate operand to these x86 instructions.
5093 SmallVector<Value *, 4> Args(CI->args());
5094
5095 // Replace the last argument with a trunc.
5096 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5097 NewCall = Builder.CreateCall(NewFn, Args);
5098 break;
5099 }
5100
5101 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5102 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5103 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5104 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5105 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5106 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5107 SmallVector<Value *, 4> Args(CI->args());
5108 unsigned NumElts =
5109 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5110 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5111
5112 NewCall = Builder.CreateCall(NewFn, Args);
5113 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5114
5115 NewCall->takeName(CI);
5116 CI->replaceAllUsesWith(Res);
5117 CI->eraseFromParent();
5118 return;
5119 }
5120
5121 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5122 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5123 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5124 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5125 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5126 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5127 SmallVector<Value *, 4> Args(CI->args());
5128 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5129 if (NewFn->getIntrinsicID() ==
5130 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5131 Args[1] = Builder.CreateBitCast(
5132 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5133
5134 NewCall = Builder.CreateCall(NewFn, Args);
5135 Value *Res = Builder.CreateBitCast(
5136 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5137
5138 NewCall->takeName(CI);
5139 CI->replaceAllUsesWith(Res);
5140 CI->eraseFromParent();
5141 return;
5142 }
5143 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5144 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5145 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5146 SmallVector<Value *, 4> Args(CI->args());
5147 unsigned NumElts =
5148 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5149 Args[1] = Builder.CreateBitCast(
5150 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5151 Args[2] = Builder.CreateBitCast(
5152 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5153
5154 NewCall = Builder.CreateCall(NewFn, Args);
5155 break;
5156 }
5157
5158 case Intrinsic::thread_pointer: {
5159 NewCall = Builder.CreateCall(NewFn, {});
5160 break;
5161 }
5162
5163 case Intrinsic::memcpy:
5164 case Intrinsic::memmove:
5165 case Intrinsic::memset: {
5166 // We have to make sure that the call signature is what we're expecting.
5167 // We only want to change the old signatures by removing the alignment arg:
5168 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5169 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5170 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5171 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5172 // Note: i8*'s in the above can be any pointer type
5173 if (CI->arg_size() != 5) {
5174 DefaultCase();
5175 return;
5176 }
5177 // Remove alignment argument (3), and add alignment attributes to the
5178 // dest/src pointers.
5179 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5180 CI->getArgOperand(2), CI->getArgOperand(4)};
5181 NewCall = Builder.CreateCall(NewFn, Args);
5182 AttributeList OldAttrs = CI->getAttributes();
5183 AttributeList NewAttrs = AttributeList::get(
5184 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5185 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5186 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5187 NewCall->setAttributes(NewAttrs);
5188 auto *MemCI = cast<MemIntrinsic>(NewCall);
5189 // All mem intrinsics support dest alignment.
5191 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5192 // Memcpy/Memmove also support source alignment.
5193 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5194 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5195 break;
5196 }
5197
5198 case Intrinsic::lifetime_start:
5199 case Intrinsic::lifetime_end: {
5200 if (CI->arg_size() != 2) {
5201 DefaultCase();
5202 return;
5203 }
5204
5205 Value *Ptr = CI->getArgOperand(1);
5206 // Try to strip pointer casts, such that the lifetime works on an alloca.
5207 Ptr = Ptr->stripPointerCasts();
5208 if (isa<AllocaInst>(Ptr)) {
5209 // Don't use NewFn, as we might have looked through an addrspacecast.
5210 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5211 NewCall = Builder.CreateLifetimeStart(Ptr);
5212 else
5213 NewCall = Builder.CreateLifetimeEnd(Ptr);
5214 break;
5215 }
5216
5217 // Otherwise remove the lifetime marker.
5218 CI->eraseFromParent();
5219 return;
5220 }
5221
5222 case Intrinsic::x86_avx512_vpdpbusd_128:
5223 case Intrinsic::x86_avx512_vpdpbusd_256:
5224 case Intrinsic::x86_avx512_vpdpbusd_512:
5225 case Intrinsic::x86_avx512_vpdpbusds_128:
5226 case Intrinsic::x86_avx512_vpdpbusds_256:
5227 case Intrinsic::x86_avx512_vpdpbusds_512: {
5228 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5229 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5230 CI->getArgOperand(2)};
5231 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5232 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5233 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5234
5235 NewCall = Builder.CreateCall(NewFn, Args);
5236 break;
5237 }
5238 }
5239 assert(NewCall && "Should have either set this variable or returned through "
5240 "the default case");
5241 NewCall->takeName(CI);
5242 CI->replaceAllUsesWith(NewCall);
5243 CI->eraseFromParent();
5244}
5245
5247 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5248
5249 // Check if this function should be upgraded and get the replacement function
5250 // if there is one.
5251 Function *NewFn;
5252 if (UpgradeIntrinsicFunction(F, NewFn)) {
5253 // Replace all users of the old function with the new function or new
5254 // instructions. This is not a range loop because the call is deleted.
5255 for (User *U : make_early_inc_range(F->users()))
5256 if (CallBase *CB = dyn_cast<CallBase>(U))
5257 UpgradeIntrinsicCall(CB, NewFn);
5258
5259 // Remove old function, no longer used, from the module.
5260 if (F != NewFn)
5261 F->eraseFromParent();
5262 }
5263}
5264
5266 const unsigned NumOperands = MD.getNumOperands();
5267 if (NumOperands == 0)
5268 return &MD; // Invalid, punt to a verifier error.
5269
5270 // Check if the tag uses struct-path aware TBAA format.
5271 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5272 return &MD;
5273
5274 auto &Context = MD.getContext();
5275 if (NumOperands == 3) {
5276 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5277 MDNode *ScalarType = MDNode::get(Context, Elts);
5278 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5279 Metadata *Elts2[] = {ScalarType, ScalarType,
5282 MD.getOperand(2)};
5283 return MDNode::get(Context, Elts2);
5284 }
5285 // Create a MDNode <MD, MD, offset 0>
5287 Type::getInt64Ty(Context)))};
5288 return MDNode::get(Context, Elts);
5289}
5290
5292 Instruction *&Temp) {
5293 if (Opc != Instruction::BitCast)
5294 return nullptr;
5295
5296 Temp = nullptr;
5297 Type *SrcTy = V->getType();
5298 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5299 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5300 LLVMContext &Context = V->getContext();
5301
5302 // We have no information about target data layout, so we assume that
5303 // the maximum pointer size is 64bit.
5304 Type *MidTy = Type::getInt64Ty(Context);
5305 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5306
5307 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5308 }
5309
5310 return nullptr;
5311}
5312
5314 if (Opc != Instruction::BitCast)
5315 return nullptr;
5316
5317 Type *SrcTy = C->getType();
5318 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5319 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5320 LLVMContext &Context = C->getContext();
5321
5322 // We have no information about target data layout, so we assume that
5323 // the maximum pointer size is 64bit.
5324 Type *MidTy = Type::getInt64Ty(Context);
5325
5327 DestTy);
5328 }
5329
5330 return nullptr;
5331}
5332
5333/// Check the debug info version number, if it is out-dated, drop the debug
5334/// info. Return true if module is modified.
5337 return false;
5338
5339 llvm::TimeTraceScope timeScope("Upgrade debug info");
5340 // We need to get metadata before the module is verified (i.e., getModuleFlag
5341 // makes assumptions that we haven't verified yet). Carefully extract the flag
5342 // from the metadata.
5343 unsigned Version = 0;
5344 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5345 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5346 if (Flag->getNumOperands() < 3)
5347 return false;
5348 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5349 return K->getString() == "Debug Info Version";
5350 return false;
5351 });
5352 if (OpIt != ModFlags->op_end()) {
5353 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5354 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5355 Version = CI->getZExtValue();
5356 }
5357 }
5358
5360 bool BrokenDebugInfo = false;
5361 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5362 report_fatal_error("Broken module found, compilation aborted!");
5363 if (!BrokenDebugInfo)
5364 // Everything is ok.
5365 return false;
5366 else {
5367 // Diagnose malformed debug info.
5369 M.getContext().diagnose(Diag);
5370 }
5371 }
5372 bool Modified = StripDebugInfo(M);
5374 // Diagnose a version mismatch.
5376 M.getContext().diagnose(DiagVersion);
5377 }
5378 return Modified;
5379}
5380
5381static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5382 GlobalValue *GV, const Metadata *V) {
5383 Function *F = cast<Function>(GV);
5384
5385 constexpr StringLiteral DefaultValue = "1";
5386 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5387 unsigned Length = 0;
5388
5389 if (F->hasFnAttribute(Attr)) {
5390 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5391 // parse these elements placing them into Vect3
5392 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5393 for (; Length < 3 && !S.empty(); Length++) {
5394 auto [Part, Rest] = S.split(',');
5395 Vect3[Length] = Part.trim();
5396 S = Rest;
5397 }
5398 }
5399
5400 const unsigned Dim = DimC - 'x';
5401 assert(Dim < 3 && "Unexpected dim char");
5402
5403 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5404
5405 // local variable required for StringRef in Vect3 to point to.
5406 const std::string VStr = llvm::utostr(VInt);
5407 Vect3[Dim] = VStr;
5408 Length = std::max(Length, Dim + 1);
5409
5410 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5411 F->addFnAttr(Attr, NewAttr);
5412}
5413
5414static inline bool isXYZ(StringRef S) {
5415 return S == "x" || S == "y" || S == "z";
5416}
5417
5419 const Metadata *V) {
5420 if (K == "kernel") {
5422 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5423 return true;
5424 }
5425 if (K == "align") {
5426 // V is a bitfeild specifying two 16-bit values. The alignment value is
5427 // specfied in low 16-bits, The index is specified in the high bits. For the
5428 // index, 0 indicates the return value while higher values correspond to
5429 // each parameter (idx = param + 1).
5430 const uint64_t AlignIdxValuePair =
5431 mdconst::extract<ConstantInt>(V)->getZExtValue();
5432 const unsigned Idx = (AlignIdxValuePair >> 16);
5433 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5434 cast<Function>(GV)->addAttributeAtIndex(
5435 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5436 return true;
5437 }
5438 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5439 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5440 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5441 return true;
5442 }
5443 if (K == "minctasm") {
5444 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5445 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5446 return true;
5447 }
5448 if (K == "maxnreg") {
5449 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5450 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5451 return true;
5452 }
5453 if (K.consume_front("maxntid") && isXYZ(K)) {
5454 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5455 return true;
5456 }
5457 if (K.consume_front("reqntid") && isXYZ(K)) {
5458 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5459 return true;
5460 }
5461 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5462 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5463 return true;
5464 }
5465 if (K == "grid_constant") {
5466 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5467 for (const auto &Op : cast<MDNode>(V)->operands()) {
5468 // For some reason, the index is 1-based in the metadata. Good thing we're
5469 // able to auto-upgrade it!
5470 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5471 cast<Function>(GV)->addParamAttr(Index, Attr);
5472 }
5473 return true;
5474 }
5475
5476 return false;
5477}
5478
5480 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5481 if (!NamedMD)
5482 return;
5483
5484 SmallVector<MDNode *, 8> NewNodes;
5486 for (MDNode *MD : NamedMD->operands()) {
5487 if (!SeenNodes.insert(MD).second)
5488 continue;
5489
5490 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5491 if (!GV)
5492 continue;
5493
5494 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5495
5496 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5497 // Each nvvm.annotations metadata entry will be of the following form:
5498 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5499 // start index = 1, to skip the global variable key
5500 // increment = 2, to skip the value for each property-value pairs
5501 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5502 MDString *K = cast<MDString>(MD->getOperand(j));
5503 const MDOperand &V = MD->getOperand(j + 1);
5504 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5505 if (!Upgraded)
5506 NewOperands.append({K, V});
5507 }
5508
5509 if (NewOperands.size() > 1)
5510 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5511 }
5512
5513 NamedMD->clearOperands();
5514 for (MDNode *N : NewNodes)
5515 NamedMD->addOperand(N);
5516}
5517
5518/// This checks for objc retain release marker which should be upgraded. It
5519/// returns true if module is modified.
5521 bool Changed = false;
5522 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5523 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5524 if (ModRetainReleaseMarker) {
5525 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5526 if (Op) {
5527 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5528 if (ID) {
5529 SmallVector<StringRef, 4> ValueComp;
5530 ID->getString().split(ValueComp, "#");
5531 if (ValueComp.size() == 2) {
5532 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5533 ID = MDString::get(M.getContext(), NewValue);
5534 }
5535 M.addModuleFlag(Module::Error, MarkerKey, ID);
5536 M.eraseNamedMetadata(ModRetainReleaseMarker);
5537 Changed = true;
5538 }
5539 }
5540 }
5541 return Changed;
5542}
5543
5545 // This lambda converts normal function calls to ARC runtime functions to
5546 // intrinsic calls.
5547 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5548 llvm::Intrinsic::ID IntrinsicFunc) {
5549 Function *Fn = M.getFunction(OldFunc);
5550
5551 if (!Fn)
5552 return;
5553
5554 Function *NewFn =
5555 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5556
5557 for (User *U : make_early_inc_range(Fn->users())) {
5559 if (!CI || CI->getCalledFunction() != Fn)
5560 continue;
5561
5562 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5563 FunctionType *NewFuncTy = NewFn->getFunctionType();
5565
5566 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5567 // value to the return type of the old function.
5568 if (NewFuncTy->getReturnType() != CI->getType() &&
5569 !CastInst::castIsValid(Instruction::BitCast, CI,
5570 NewFuncTy->getReturnType()))
5571 continue;
5572
5573 bool InvalidCast = false;
5574
5575 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5576 Value *Arg = CI->getArgOperand(I);
5577
5578 // Bitcast argument to the parameter type of the new function if it's
5579 // not a variadic argument.
5580 if (I < NewFuncTy->getNumParams()) {
5581 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5582 // to the parameter type of the new function.
5583 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5584 NewFuncTy->getParamType(I))) {
5585 InvalidCast = true;
5586 break;
5587 }
5588 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5589 }
5590 Args.push_back(Arg);
5591 }
5592
5593 if (InvalidCast)
5594 continue;
5595
5596 // Create a call instruction that calls the new function.
5597 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5598 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5599 NewCall->takeName(CI);
5600
5601 // Bitcast the return value back to the type of the old call.
5602 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5603
5604 if (!CI->use_empty())
5605 CI->replaceAllUsesWith(NewRetVal);
5606 CI->eraseFromParent();
5607 }
5608
5609 if (Fn->use_empty())
5610 Fn->eraseFromParent();
5611 };
5612
5613 // Unconditionally convert a call to "clang.arc.use" to a call to
5614 // "llvm.objc.clang.arc.use".
5615 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5616
5617 // Upgrade the retain release marker. If there is no need to upgrade
5618 // the marker, that means either the module is already new enough to contain
5619 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5621 return;
5622
5623 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5624 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5625 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5626 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5627 {"objc_autoreleaseReturnValue",
5628 llvm::Intrinsic::objc_autoreleaseReturnValue},
5629 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5630 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5631 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5632 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5633 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5634 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5635 {"objc_release", llvm::Intrinsic::objc_release},
5636 {"objc_retain", llvm::Intrinsic::objc_retain},
5637 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5638 {"objc_retainAutoreleaseReturnValue",
5639 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5640 {"objc_retainAutoreleasedReturnValue",
5641 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5642 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5643 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5644 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5645 {"objc_unsafeClaimAutoreleasedReturnValue",
5646 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5647 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5648 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5649 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5650 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5651 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5652 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5653 {"objc_arc_annotation_topdown_bbstart",
5654 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5655 {"objc_arc_annotation_topdown_bbend",
5656 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5657 {"objc_arc_annotation_bottomup_bbstart",
5658 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5659 {"objc_arc_annotation_bottomup_bbend",
5660 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5661
5662 for (auto &I : RuntimeFuncs)
5663 UpgradeToIntrinsic(I.first, I.second);
5664}
5665
5667 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5668 if (!ModFlags)
5669 return false;
5670
5671 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5672 bool HasSwiftVersionFlag = false;
5673 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5674 uint32_t SwiftABIVersion;
5675 auto Int8Ty = Type::getInt8Ty(M.getContext());
5676 auto Int32Ty = Type::getInt32Ty(M.getContext());
5677
5678 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5679 MDNode *Op = ModFlags->getOperand(I);
5680 if (Op->getNumOperands() != 3)
5681 continue;
5682 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5683 if (!ID)
5684 continue;
5685 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5686 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5687 Type::getInt32Ty(M.getContext()), B)),
5688 MDString::get(M.getContext(), ID->getString()),
5689 Op->getOperand(2)};
5690 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5691 Changed = true;
5692 };
5693
5694 if (ID->getString() == "Objective-C Image Info Version")
5695 HasObjCFlag = true;
5696 if (ID->getString() == "Objective-C Class Properties")
5697 HasClassProperties = true;
5698 // Upgrade PIC from Error/Max to Min.
5699 if (ID->getString() == "PIC Level") {
5700 if (auto *Behavior =
5702 uint64_t V = Behavior->getLimitedValue();
5703 if (V == Module::Error || V == Module::Max)
5704 SetBehavior(Module::Min);
5705 }
5706 }
5707 // Upgrade "PIE Level" from Error to Max.
5708 if (ID->getString() == "PIE Level")
5709 if (auto *Behavior =
5711 if (Behavior->getLimitedValue() == Module::Error)
5712 SetBehavior(Module::Max);
5713
5714 // Upgrade branch protection and return address signing module flags. The
5715 // module flag behavior for these fields were Error and now they are Min.
5716 if (ID->getString() == "branch-target-enforcement" ||
5717 ID->getString().starts_with("sign-return-address")) {
5718 if (auto *Behavior =
5720 if (Behavior->getLimitedValue() == Module::Error) {
5721 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5722 Metadata *Ops[3] = {
5723 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5724 Op->getOperand(1), Op->getOperand(2)};
5725 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5726 Changed = true;
5727 }
5728 }
5729 }
5730
5731 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5732 // section name so that llvm-lto will not complain about mismatching
5733 // module flags that is functionally the same.
5734 if (ID->getString() == "Objective-C Image Info Section") {
5735 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5736 SmallVector<StringRef, 4> ValueComp;
5737 Value->getString().split(ValueComp, " ");
5738 if (ValueComp.size() != 1) {
5739 std::string NewValue;
5740 for (auto &S : ValueComp)
5741 NewValue += S.str();
5742 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5743 MDString::get(M.getContext(), NewValue)};
5744 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5745 Changed = true;
5746 }
5747 }
5748 }
5749
5750 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5751 // If the higher bits are set, it adds new module flag for swift info.
5752 if (ID->getString() == "Objective-C Garbage Collection") {
5753 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5754 if (Md) {
5755 assert(Md->getValue() && "Expected non-empty metadata");
5756 auto Type = Md->getValue()->getType();
5757 if (Type == Int8Ty)
5758 continue;
5759 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5760 if ((Val & 0xff) != Val) {
5761 HasSwiftVersionFlag = true;
5762 SwiftABIVersion = (Val & 0xff00) >> 8;
5763 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5764 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5765 }
5766 Metadata *Ops[3] = {
5768 Op->getOperand(1),
5769 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5770 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5771 Changed = true;
5772 }
5773 }
5774
5775 if (ID->getString() == "amdgpu_code_object_version") {
5776 Metadata *Ops[3] = {
5777 Op->getOperand(0),
5778 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5779 Op->getOperand(2)};
5780 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5781 Changed = true;
5782 }
5783 }
5784
5785 // "Objective-C Class Properties" is recently added for Objective-C. We
5786 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5787 // flag of value 0, so we can correclty downgrade this flag when trying to
5788 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5789 // this module flag.
5790 if (HasObjCFlag && !HasClassProperties) {
5791 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5792 (uint32_t)0);
5793 Changed = true;
5794 }
5795
5796 if (HasSwiftVersionFlag) {
5797 M.addModuleFlag(Module::Error, "Swift ABI Version",
5798 SwiftABIVersion);
5799 M.addModuleFlag(Module::Error, "Swift Major Version",
5800 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5801 M.addModuleFlag(Module::Error, "Swift Minor Version",
5802 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5803 Changed = true;
5804 }
5805
5806 return Changed;
5807}
5808
5810 auto TrimSpaces = [](StringRef Section) -> std::string {
5811 SmallVector<StringRef, 5> Components;
5812 Section.split(Components, ',');
5813
5814 SmallString<32> Buffer;
5815 raw_svector_ostream OS(Buffer);
5816
5817 for (auto Component : Components)
5818 OS << ',' << Component.trim();
5819
5820 return std::string(OS.str().substr(1));
5821 };
5822
5823 for (auto &GV : M.globals()) {
5824 if (!GV.hasSection())
5825 continue;
5826
5827 StringRef Section = GV.getSection();
5828
5829 if (!Section.starts_with("__DATA, __objc_catlist"))
5830 continue;
5831
5832 // __DATA, __objc_catlist, regular, no_dead_strip
5833 // __DATA,__objc_catlist,regular,no_dead_strip
5834 GV.setSection(TrimSpaces(Section));
5835 }
5836}
5837
5838namespace {
5839// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5840// callsites within a function that did not also have the strictfp attribute.
5841// Since 10.0, if strict FP semantics are needed within a function, the
5842// function must have the strictfp attribute and all calls within the function
5843// must also have the strictfp attribute. This latter restriction is
5844// necessary to prevent unwanted libcall simplification when a function is
5845// being cloned (such as for inlining).
5846//
5847// The "dangling" strictfp attribute usage was only used to prevent constant
5848// folding and other libcall simplification. The nobuiltin attribute on the
5849// callsite has the same effect.
5850struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5851 StrictFPUpgradeVisitor() = default;
5852
5853 void visitCallBase(CallBase &Call) {
5854 if (!Call.isStrictFP())
5855 return;
5857 return;
5858 // If we get here, the caller doesn't have the strictfp attribute
5859 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5860 Call.removeFnAttr(Attribute::StrictFP);
5861 Call.addFnAttr(Attribute::NoBuiltin);
5862 }
5863};
5864
5865/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
5866struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5867 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5868 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
5869
5870 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
5871 if (!RMW.isFloatingPointOperation())
5872 return;
5873
5874 MDNode *Empty = MDNode::get(RMW.getContext(), {});
5875 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
5876 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
5877 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
5878 }
5879};
5880} // namespace
5881
5883 // If a function definition doesn't have the strictfp attribute,
5884 // convert any callsite strictfp attributes to nobuiltin.
5885 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5886 StrictFPUpgradeVisitor SFPV;
5887 SFPV.visit(F);
5888 }
5889
5890 // Remove all incompatibile attributes from function.
5891 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
5892 F.getReturnType(), F.getAttributes().getRetAttrs()));
5893 for (auto &Arg : F.args())
5894 Arg.removeAttrs(
5895 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
5896
5897 // Older versions of LLVM treated an "implicit-section-name" attribute
5898 // similarly to directly setting the section on a Function.
5899 if (Attribute A = F.getFnAttribute("implicit-section-name");
5900 A.isValid() && A.isStringAttribute()) {
5901 F.setSection(A.getValueAsString());
5902 F.removeFnAttr("implicit-section-name");
5903 }
5904
5905 if (!F.empty()) {
5906 // For some reason this is called twice, and the first time is before any
5907 // instructions are loaded into the body.
5908
5909 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
5910 A.isValid()) {
5911
5912 if (A.getValueAsBool()) {
5913 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5914 Visitor.visit(F);
5915 }
5916
5917 // We will leave behind dead attribute uses on external declarations, but
5918 // clang never added these to declarations anyway.
5919 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
5920 }
5921 }
5922}
5923
5924static bool isOldLoopArgument(Metadata *MD) {
5925 auto *T = dyn_cast_or_null<MDTuple>(MD);
5926 if (!T)
5927 return false;
5928 if (T->getNumOperands() < 1)
5929 return false;
5930 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5931 if (!S)
5932 return false;
5933 return S->getString().starts_with("llvm.vectorizer.");
5934}
5935
5937 StringRef OldPrefix = "llvm.vectorizer.";
5938 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5939
5940 if (OldTag == "llvm.vectorizer.unroll")
5941 return MDString::get(C, "llvm.loop.interleave.count");
5942
5943 return MDString::get(
5944 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5945 .str());
5946}
5947
5949 auto *T = dyn_cast_or_null<MDTuple>(MD);
5950 if (!T)
5951 return MD;
5952 if (T->getNumOperands() < 1)
5953 return MD;
5954 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5955 if (!OldTag)
5956 return MD;
5957 if (!OldTag->getString().starts_with("llvm.vectorizer."))
5958 return MD;
5959
5960 // This has an old tag. Upgrade it.
5962 Ops.reserve(T->getNumOperands());
5963 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5964 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5965 Ops.push_back(T->getOperand(I));
5966
5967 return MDTuple::get(T->getContext(), Ops);
5968}
5969
5971 auto *T = dyn_cast<MDTuple>(&N);
5972 if (!T)
5973 return &N;
5974
5975 if (none_of(T->operands(), isOldLoopArgument))
5976 return &N;
5977
5979 Ops.reserve(T->getNumOperands());
5980 for (Metadata *MD : T->operands())
5981 Ops.push_back(upgradeLoopArgument(MD));
5982
5983 return MDTuple::get(T->getContext(), Ops);
5984}
5985
5987 Triple T(TT);
5988 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5989 // the address space of globals to 1. This does not apply to SPIRV Logical.
5990 if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5991 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5992 !DL.contains("-G") && !DL.starts_with("G")) {
5993 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5994 }
5995
5996 if (T.isLoongArch64() || T.isRISCV64()) {
5997 // Make i32 a native type for 64-bit LoongArch and RISC-V.
5998 auto I = DL.find("-n64-");
5999 if (I != StringRef::npos)
6000 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6001 return DL.str();
6002 }
6003
6004 std::string Res = DL.str();
6005 // AMDGCN data layout upgrades.
6006 if (T.isAMDGCN()) {
6007 // Define address spaces for constants.
6008 if (!DL.contains("-G") && !DL.starts_with("G"))
6009 Res.append(Res.empty() ? "G1" : "-G1");
6010
6011 // Add missing non-integral declarations.
6012 // This goes before adding new address spaces to prevent incoherent string
6013 // values.
6014 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6015 Res.append("-ni:7:8:9");
6016 // Update ni:7 to ni:7:8:9.
6017 if (DL.ends_with("ni:7"))
6018 Res.append(":8:9");
6019 if (DL.ends_with("ni:7:8"))
6020 Res.append(":9");
6021
6022 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6023 // resources) An empty data layout has already been upgraded to G1 by now.
6024 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6025 Res.append("-p7:160:256:256:32");
6026 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6027 Res.append("-p8:128:128:128:48");
6028 constexpr StringRef OldP8("-p8:128:128-");
6029 if (DL.contains(OldP8))
6030 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6031 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6032 Res.append("-p9:192:256:256:32");
6033
6034 return Res;
6035 }
6036
6037 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6038 // If the datalayout matches the expected format, add pointer size address
6039 // spaces to the datalayout.
6040 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6041 if (!DL.contains(AddrSpaces)) {
6043 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6044 if (R.match(Res, &Groups))
6045 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6046 }
6047 };
6048
6049 // AArch64 data layout upgrades.
6050 if (T.isAArch64()) {
6051 // Add "-Fn32"
6052 if (!DL.empty() && !DL.contains("-Fn32"))
6053 Res.append("-Fn32");
6054 AddPtr32Ptr64AddrSpaces();
6055 return Res;
6056 }
6057
6058 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6059 T.isWasm()) {
6060 // Mips64 with o32 ABI did not add "-i128:128".
6061 // Add "-i128:128"
6062 std::string I64 = "-i64:64";
6063 std::string I128 = "-i128:128";
6064 if (!StringRef(Res).contains(I128)) {
6065 size_t Pos = Res.find(I64);
6066 if (Pos != size_t(-1))
6067 Res.insert(Pos + I64.size(), I128);
6068 }
6069 return Res;
6070 }
6071
6072 if (!T.isX86())
6073 return Res;
6074
6075 AddPtr32Ptr64AddrSpaces();
6076
6077 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6078 // for i128 operations prior to this being reflected in the data layout, and
6079 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6080 // boundaries, so although this is a breaking change, the upgrade is expected
6081 // to fix more IR than it breaks.
6082 // Intel MCU is an exception and uses 4-byte-alignment.
6083 if (!T.isOSIAMCU()) {
6084 std::string I128 = "-i128:128";
6085 if (StringRef Ref = Res; !Ref.contains(I128)) {
6087 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6088 if (R.match(Res, &Groups))
6089 Res = (Groups[1] + I128 + Groups[3]).str();
6090 }
6091 }
6092
6093 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6094 // Raising the alignment is safe because Clang did not produce f80 values in
6095 // the MSVC environment before this upgrade was added.
6096 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6097 StringRef Ref = Res;
6098 auto I = Ref.find("-f80:32-");
6099 if (I != StringRef::npos)
6100 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6101 }
6102
6103 return Res;
6104}
6105
6106void llvm::UpgradeAttributes(AttrBuilder &B) {
6107 StringRef FramePointer;
6108 Attribute A = B.getAttribute("no-frame-pointer-elim");
6109 if (A.isValid()) {
6110 // The value can be "true" or "false".
6111 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6112 B.removeAttribute("no-frame-pointer-elim");
6113 }
6114 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6115 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6116 if (FramePointer != "all")
6117 FramePointer = "non-leaf";
6118 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6119 }
6120 if (!FramePointer.empty())
6121 B.addAttribute("frame-pointer", FramePointer);
6122
6123 A = B.getAttribute("null-pointer-is-valid");
6124 if (A.isValid()) {
6125 // The value can be "true" or "false".
6126 bool NullPointerIsValid = A.getValueAsString() == "true";
6127 B.removeAttribute("null-pointer-is-valid");
6128 if (NullPointerIsValid)
6129 B.addAttribute(Attribute::NullPointerIsValid);
6130 }
6131}
6132
6133void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6134 // clang.arc.attachedcall bundles are now required to have an operand.
6135 // If they don't, it's okay to drop them entirely: when there is an operand,
6136 // the "attachedcall" is meaningful and required, but without an operand,
6137 // it's just a marker NOP. Dropping it merely prevents an optimization.
6138 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6139 return OBD.getTag() == "clang.arc.attachedcall" &&
6140 OBD.inputs().empty();
6141 });
6142}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ ICMP_NE
not equal
Definition InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:704
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:535
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
const Function & getFunction() const
Definition Function.h:164
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:448
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Argument * getArg(unsigned i) const
Definition Function.h:884
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1077
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1445
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1565
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1451
LLVMContext & getContext() const
Definition Metadata.h:1241
Tracking metadata reference owned by Metadata.
Definition Metadata.h:899
A single uniqued string.
Definition Metadata.h:720
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:607
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1522
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:182
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:103
Root of the metadata hierarchy.
Definition Metadata.h:63
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1753
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1849
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:825
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:862
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:710
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:581
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:151
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:619
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:824
static constexpr size_t npos
Definition StringRef.h:57
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:414
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:283
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:130
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:707
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:666
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1657
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:738
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1719
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2100
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:53
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117