LLVM 22.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsARM.h"
36#include "llvm/IR/IntrinsicsNVPTX.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
38#include "llvm/IR/IntrinsicsWebAssembly.h"
39#include "llvm/IR/IntrinsicsX86.h"
40#include "llvm/IR/LLVMContext.h"
41#include "llvm/IR/MDBuilder.h"
42#include "llvm/IR/Metadata.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/Value.h"
45#include "llvm/IR/Verifier.h"
50#include "llvm/Support/Regex.h"
53#include <cstdint>
54#include <cstring>
55#include <numeric>
56
57using namespace llvm;
58
59static cl::opt<bool>
60 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
61 cl::desc("Disable autoupgrade of debug info"));
62
63static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
64
65// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
66// changed their type from v4f32 to v2i64.
68 Function *&NewFn) {
69 // Check whether this is an old version of the function, which received
70 // v4f32 arguments.
71 Type *Arg0Type = F->getFunctionType()->getParamType(0);
72 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
73 return false;
74
75 // Yes, it's old, replace it with new version.
76 rename(F);
77 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
78 return true;
79}
80
81// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
82// arguments have changed their type from i32 to i8.
84 Function *&NewFn) {
85 // Check that the last argument is an i32.
86 Type *LastArgType = F->getFunctionType()->getParamType(
87 F->getFunctionType()->getNumParams() - 1);
88 if (!LastArgType->isIntegerTy(32))
89 return false;
90
91 // Move this function aside and map down.
92 rename(F);
93 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
94 return true;
95}
96
97// Upgrade the declaration of fp compare intrinsics that change return type
98// from scalar to vXi1 mask.
100 Function *&NewFn) {
101 // Check if the return type is a vector.
102 if (F->getReturnType()->isVectorTy())
103 return false;
104
105 rename(F);
106 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
107 return true;
108}
109
110// Upgrade the declaration of multiply and add bytes intrinsics whose input
111// arguments' types have changed from vectors of i32 to vectors of i8
113 Function *&NewFn) {
114 // check if input argument type is a vector of i8
115 Type *Arg1Type = F->getFunctionType()->getParamType(1);
116 Type *Arg2Type = F->getFunctionType()->getParamType(2);
117 if (Arg1Type->isVectorTy() &&
118 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
119 Arg2Type->isVectorTy() &&
120 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
121 return false;
122
123 rename(F);
124 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
125 return true;
126}
127
129 Function *&NewFn) {
130 if (F->getReturnType()->getScalarType()->isBFloatTy())
131 return false;
132
133 rename(F);
134 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
135 return true;
136}
137
139 Function *&NewFn) {
140 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
141 return false;
142
143 rename(F);
144 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
145 return true;
146}
147
149 // All of the intrinsics matches below should be marked with which llvm
150 // version started autoupgrading them. At some point in the future we would
151 // like to use this information to remove upgrade code for some older
152 // intrinsics. It is currently undecided how we will determine that future
153 // point.
154 if (Name.consume_front("avx."))
155 return (Name.starts_with("blend.p") || // Added in 3.7
156 Name == "cvt.ps2.pd.256" || // Added in 3.9
157 Name == "cvtdq2.pd.256" || // Added in 3.9
158 Name == "cvtdq2.ps.256" || // Added in 7.0
159 Name.starts_with("movnt.") || // Added in 3.2
160 Name.starts_with("sqrt.p") || // Added in 7.0
161 Name.starts_with("storeu.") || // Added in 3.9
162 Name.starts_with("vbroadcast.s") || // Added in 3.5
163 Name.starts_with("vbroadcastf128") || // Added in 4.0
164 Name.starts_with("vextractf128.") || // Added in 3.7
165 Name.starts_with("vinsertf128.") || // Added in 3.7
166 Name.starts_with("vperm2f128.") || // Added in 6.0
167 Name.starts_with("vpermil.")); // Added in 3.1
168
169 if (Name.consume_front("avx2."))
170 return (Name == "movntdqa" || // Added in 5.0
171 Name.starts_with("pabs.") || // Added in 6.0
172 Name.starts_with("padds.") || // Added in 8.0
173 Name.starts_with("paddus.") || // Added in 8.0
174 Name.starts_with("pblendd.") || // Added in 3.7
175 Name == "pblendw" || // Added in 3.7
176 Name.starts_with("pbroadcast") || // Added in 3.8
177 Name.starts_with("pcmpeq.") || // Added in 3.1
178 Name.starts_with("pcmpgt.") || // Added in 3.1
179 Name.starts_with("pmax") || // Added in 3.9
180 Name.starts_with("pmin") || // Added in 3.9
181 Name.starts_with("pmovsx") || // Added in 3.9
182 Name.starts_with("pmovzx") || // Added in 3.9
183 Name == "pmul.dq" || // Added in 7.0
184 Name == "pmulu.dq" || // Added in 7.0
185 Name.starts_with("psll.dq") || // Added in 3.7
186 Name.starts_with("psrl.dq") || // Added in 3.7
187 Name.starts_with("psubs.") || // Added in 8.0
188 Name.starts_with("psubus.") || // Added in 8.0
189 Name.starts_with("vbroadcast") || // Added in 3.8
190 Name == "vbroadcasti128" || // Added in 3.7
191 Name == "vextracti128" || // Added in 3.7
192 Name == "vinserti128" || // Added in 3.7
193 Name == "vperm2i128"); // Added in 6.0
194
195 if (Name.consume_front("avx512.")) {
196 if (Name.consume_front("mask."))
197 // 'avx512.mask.*'
198 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
199 Name.starts_with("and.") || // Added in 3.9
200 Name.starts_with("andn.") || // Added in 3.9
201 Name.starts_with("broadcast.s") || // Added in 3.9
202 Name.starts_with("broadcastf32x4.") || // Added in 6.0
203 Name.starts_with("broadcastf32x8.") || // Added in 6.0
204 Name.starts_with("broadcastf64x2.") || // Added in 6.0
205 Name.starts_with("broadcastf64x4.") || // Added in 6.0
206 Name.starts_with("broadcasti32x4.") || // Added in 6.0
207 Name.starts_with("broadcasti32x8.") || // Added in 6.0
208 Name.starts_with("broadcasti64x2.") || // Added in 6.0
209 Name.starts_with("broadcasti64x4.") || // Added in 6.0
210 Name.starts_with("cmp.b") || // Added in 5.0
211 Name.starts_with("cmp.d") || // Added in 5.0
212 Name.starts_with("cmp.q") || // Added in 5.0
213 Name.starts_with("cmp.w") || // Added in 5.0
214 Name.starts_with("compress.b") || // Added in 9.0
215 Name.starts_with("compress.d") || // Added in 9.0
216 Name.starts_with("compress.p") || // Added in 9.0
217 Name.starts_with("compress.q") || // Added in 9.0
218 Name.starts_with("compress.store.") || // Added in 7.0
219 Name.starts_with("compress.w") || // Added in 9.0
220 Name.starts_with("conflict.") || // Added in 9.0
221 Name.starts_with("cvtdq2pd.") || // Added in 4.0
222 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
223 Name == "cvtpd2dq.256" || // Added in 7.0
224 Name == "cvtpd2ps.256" || // Added in 7.0
225 Name == "cvtps2pd.128" || // Added in 7.0
226 Name == "cvtps2pd.256" || // Added in 7.0
227 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
228 Name == "cvtqq2ps.256" || // Added in 9.0
229 Name == "cvtqq2ps.512" || // Added in 9.0
230 Name == "cvttpd2dq.256" || // Added in 7.0
231 Name == "cvttps2dq.128" || // Added in 7.0
232 Name == "cvttps2dq.256" || // Added in 7.0
233 Name.starts_with("cvtudq2pd.") || // Added in 4.0
234 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
235 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
236 Name == "cvtuqq2ps.256" || // Added in 9.0
237 Name == "cvtuqq2ps.512" || // Added in 9.0
238 Name.starts_with("dbpsadbw.") || // Added in 7.0
239 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
240 Name.starts_with("expand.b") || // Added in 9.0
241 Name.starts_with("expand.d") || // Added in 9.0
242 Name.starts_with("expand.load.") || // Added in 7.0
243 Name.starts_with("expand.p") || // Added in 9.0
244 Name.starts_with("expand.q") || // Added in 9.0
245 Name.starts_with("expand.w") || // Added in 9.0
246 Name.starts_with("fpclass.p") || // Added in 7.0
247 Name.starts_with("insert") || // Added in 4.0
248 Name.starts_with("load.") || // Added in 3.9
249 Name.starts_with("loadu.") || // Added in 3.9
250 Name.starts_with("lzcnt.") || // Added in 5.0
251 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
252 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
253 Name.starts_with("movddup") || // Added in 3.9
254 Name.starts_with("move.s") || // Added in 4.0
255 Name.starts_with("movshdup") || // Added in 3.9
256 Name.starts_with("movsldup") || // Added in 3.9
257 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
258 Name.starts_with("or.") || // Added in 3.9
259 Name.starts_with("pabs.") || // Added in 6.0
260 Name.starts_with("packssdw.") || // Added in 5.0
261 Name.starts_with("packsswb.") || // Added in 5.0
262 Name.starts_with("packusdw.") || // Added in 5.0
263 Name.starts_with("packuswb.") || // Added in 5.0
264 Name.starts_with("padd.") || // Added in 4.0
265 Name.starts_with("padds.") || // Added in 8.0
266 Name.starts_with("paddus.") || // Added in 8.0
267 Name.starts_with("palignr.") || // Added in 3.9
268 Name.starts_with("pand.") || // Added in 3.9
269 Name.starts_with("pandn.") || // Added in 3.9
270 Name.starts_with("pavg") || // Added in 6.0
271 Name.starts_with("pbroadcast") || // Added in 6.0
272 Name.starts_with("pcmpeq.") || // Added in 3.9
273 Name.starts_with("pcmpgt.") || // Added in 3.9
274 Name.starts_with("perm.df.") || // Added in 3.9
275 Name.starts_with("perm.di.") || // Added in 3.9
276 Name.starts_with("permvar.") || // Added in 7.0
277 Name.starts_with("pmaddubs.w.") || // Added in 7.0
278 Name.starts_with("pmaddw.d.") || // Added in 7.0
279 Name.starts_with("pmax") || // Added in 4.0
280 Name.starts_with("pmin") || // Added in 4.0
281 Name == "pmov.qd.256" || // Added in 9.0
282 Name == "pmov.qd.512" || // Added in 9.0
283 Name == "pmov.wb.256" || // Added in 9.0
284 Name == "pmov.wb.512" || // Added in 9.0
285 Name.starts_with("pmovsx") || // Added in 4.0
286 Name.starts_with("pmovzx") || // Added in 4.0
287 Name.starts_with("pmul.dq.") || // Added in 4.0
288 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
289 Name.starts_with("pmulh.w.") || // Added in 7.0
290 Name.starts_with("pmulhu.w.") || // Added in 7.0
291 Name.starts_with("pmull.") || // Added in 4.0
292 Name.starts_with("pmultishift.qb.") || // Added in 8.0
293 Name.starts_with("pmulu.dq.") || // Added in 4.0
294 Name.starts_with("por.") || // Added in 3.9
295 Name.starts_with("prol.") || // Added in 8.0
296 Name.starts_with("prolv.") || // Added in 8.0
297 Name.starts_with("pror.") || // Added in 8.0
298 Name.starts_with("prorv.") || // Added in 8.0
299 Name.starts_with("pshuf.b.") || // Added in 4.0
300 Name.starts_with("pshuf.d.") || // Added in 3.9
301 Name.starts_with("pshufh.w.") || // Added in 3.9
302 Name.starts_with("pshufl.w.") || // Added in 3.9
303 Name.starts_with("psll.d") || // Added in 4.0
304 Name.starts_with("psll.q") || // Added in 4.0
305 Name.starts_with("psll.w") || // Added in 4.0
306 Name.starts_with("pslli") || // Added in 4.0
307 Name.starts_with("psllv") || // Added in 4.0
308 Name.starts_with("psra.d") || // Added in 4.0
309 Name.starts_with("psra.q") || // Added in 4.0
310 Name.starts_with("psra.w") || // Added in 4.0
311 Name.starts_with("psrai") || // Added in 4.0
312 Name.starts_with("psrav") || // Added in 4.0
313 Name.starts_with("psrl.d") || // Added in 4.0
314 Name.starts_with("psrl.q") || // Added in 4.0
315 Name.starts_with("psrl.w") || // Added in 4.0
316 Name.starts_with("psrli") || // Added in 4.0
317 Name.starts_with("psrlv") || // Added in 4.0
318 Name.starts_with("psub.") || // Added in 4.0
319 Name.starts_with("psubs.") || // Added in 8.0
320 Name.starts_with("psubus.") || // Added in 8.0
321 Name.starts_with("pternlog.") || // Added in 7.0
322 Name.starts_with("punpckh") || // Added in 3.9
323 Name.starts_with("punpckl") || // Added in 3.9
324 Name.starts_with("pxor.") || // Added in 3.9
325 Name.starts_with("shuf.f") || // Added in 6.0
326 Name.starts_with("shuf.i") || // Added in 6.0
327 Name.starts_with("shuf.p") || // Added in 4.0
328 Name.starts_with("sqrt.p") || // Added in 7.0
329 Name.starts_with("store.b.") || // Added in 3.9
330 Name.starts_with("store.d.") || // Added in 3.9
331 Name.starts_with("store.p") || // Added in 3.9
332 Name.starts_with("store.q.") || // Added in 3.9
333 Name.starts_with("store.w.") || // Added in 3.9
334 Name == "store.ss" || // Added in 7.0
335 Name.starts_with("storeu.") || // Added in 3.9
336 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
337 Name.starts_with("ucmp.") || // Added in 5.0
338 Name.starts_with("unpckh.") || // Added in 3.9
339 Name.starts_with("unpckl.") || // Added in 3.9
340 Name.starts_with("valign.") || // Added in 4.0
341 Name == "vcvtph2ps.128" || // Added in 11.0
342 Name == "vcvtph2ps.256" || // Added in 11.0
343 Name.starts_with("vextract") || // Added in 4.0
344 Name.starts_with("vfmadd.") || // Added in 7.0
345 Name.starts_with("vfmaddsub.") || // Added in 7.0
346 Name.starts_with("vfnmadd.") || // Added in 7.0
347 Name.starts_with("vfnmsub.") || // Added in 7.0
348 Name.starts_with("vpdpbusd.") || // Added in 7.0
349 Name.starts_with("vpdpbusds.") || // Added in 7.0
350 Name.starts_with("vpdpwssd.") || // Added in 7.0
351 Name.starts_with("vpdpwssds.") || // Added in 7.0
352 Name.starts_with("vpermi2var.") || // Added in 7.0
353 Name.starts_with("vpermil.p") || // Added in 3.9
354 Name.starts_with("vpermilvar.") || // Added in 4.0
355 Name.starts_with("vpermt2var.") || // Added in 7.0
356 Name.starts_with("vpmadd52") || // Added in 7.0
357 Name.starts_with("vpshld.") || // Added in 7.0
358 Name.starts_with("vpshldv.") || // Added in 8.0
359 Name.starts_with("vpshrd.") || // Added in 7.0
360 Name.starts_with("vpshrdv.") || // Added in 8.0
361 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
362 Name.starts_with("xor.")); // Added in 3.9
363
364 if (Name.consume_front("mask3."))
365 // 'avx512.mask3.*'
366 return (Name.starts_with("vfmadd.") || // Added in 7.0
367 Name.starts_with("vfmaddsub.") || // Added in 7.0
368 Name.starts_with("vfmsub.") || // Added in 7.0
369 Name.starts_with("vfmsubadd.") || // Added in 7.0
370 Name.starts_with("vfnmsub.")); // Added in 7.0
371
372 if (Name.consume_front("maskz."))
373 // 'avx512.maskz.*'
374 return (Name.starts_with("pternlog.") || // Added in 7.0
375 Name.starts_with("vfmadd.") || // Added in 7.0
376 Name.starts_with("vfmaddsub.") || // Added in 7.0
377 Name.starts_with("vpdpbusd.") || // Added in 7.0
378 Name.starts_with("vpdpbusds.") || // Added in 7.0
379 Name.starts_with("vpdpwssd.") || // Added in 7.0
380 Name.starts_with("vpdpwssds.") || // Added in 7.0
381 Name.starts_with("vpermt2var.") || // Added in 7.0
382 Name.starts_with("vpmadd52") || // Added in 7.0
383 Name.starts_with("vpshldv.") || // Added in 8.0
384 Name.starts_with("vpshrdv.")); // Added in 8.0
385
386 // 'avx512.*'
387 return (Name == "movntdqa" || // Added in 5.0
388 Name == "pmul.dq.512" || // Added in 7.0
389 Name == "pmulu.dq.512" || // Added in 7.0
390 Name.starts_with("broadcastm") || // Added in 6.0
391 Name.starts_with("cmp.p") || // Added in 12.0
392 Name.starts_with("cvtb2mask.") || // Added in 7.0
393 Name.starts_with("cvtd2mask.") || // Added in 7.0
394 Name.starts_with("cvtmask2") || // Added in 5.0
395 Name.starts_with("cvtq2mask.") || // Added in 7.0
396 Name == "cvtusi2sd" || // Added in 7.0
397 Name.starts_with("cvtw2mask.") || // Added in 7.0
398 Name == "kand.w" || // Added in 7.0
399 Name == "kandn.w" || // Added in 7.0
400 Name == "knot.w" || // Added in 7.0
401 Name == "kor.w" || // Added in 7.0
402 Name == "kortestc.w" || // Added in 7.0
403 Name == "kortestz.w" || // Added in 7.0
404 Name.starts_with("kunpck") || // added in 6.0
405 Name == "kxnor.w" || // Added in 7.0
406 Name == "kxor.w" || // Added in 7.0
407 Name.starts_with("padds.") || // Added in 8.0
408 Name.starts_with("pbroadcast") || // Added in 3.9
409 Name.starts_with("prol") || // Added in 8.0
410 Name.starts_with("pror") || // Added in 8.0
411 Name.starts_with("psll.dq") || // Added in 3.9
412 Name.starts_with("psrl.dq") || // Added in 3.9
413 Name.starts_with("psubs.") || // Added in 8.0
414 Name.starts_with("ptestm") || // Added in 6.0
415 Name.starts_with("ptestnm") || // Added in 6.0
416 Name.starts_with("storent.") || // Added in 3.9
417 Name.starts_with("vbroadcast.s") || // Added in 7.0
418 Name.starts_with("vpshld.") || // Added in 8.0
419 Name.starts_with("vpshrd.")); // Added in 8.0
420 }
421
422 if (Name.consume_front("fma."))
423 return (Name.starts_with("vfmadd.") || // Added in 7.0
424 Name.starts_with("vfmsub.") || // Added in 7.0
425 Name.starts_with("vfmsubadd.") || // Added in 7.0
426 Name.starts_with("vfnmadd.") || // Added in 7.0
427 Name.starts_with("vfnmsub.")); // Added in 7.0
428
429 if (Name.consume_front("fma4."))
430 return Name.starts_with("vfmadd.s"); // Added in 7.0
431
432 if (Name.consume_front("sse."))
433 return (Name == "add.ss" || // Added in 4.0
434 Name == "cvtsi2ss" || // Added in 7.0
435 Name == "cvtsi642ss" || // Added in 7.0
436 Name == "div.ss" || // Added in 4.0
437 Name == "mul.ss" || // Added in 4.0
438 Name.starts_with("sqrt.p") || // Added in 7.0
439 Name == "sqrt.ss" || // Added in 7.0
440 Name.starts_with("storeu.") || // Added in 3.9
441 Name == "sub.ss"); // Added in 4.0
442
443 if (Name.consume_front("sse2."))
444 return (Name == "add.sd" || // Added in 4.0
445 Name == "cvtdq2pd" || // Added in 3.9
446 Name == "cvtdq2ps" || // Added in 7.0
447 Name == "cvtps2pd" || // Added in 3.9
448 Name == "cvtsi2sd" || // Added in 7.0
449 Name == "cvtsi642sd" || // Added in 7.0
450 Name == "cvtss2sd" || // Added in 7.0
451 Name == "div.sd" || // Added in 4.0
452 Name == "mul.sd" || // Added in 4.0
453 Name.starts_with("padds.") || // Added in 8.0
454 Name.starts_with("paddus.") || // Added in 8.0
455 Name.starts_with("pcmpeq.") || // Added in 3.1
456 Name.starts_with("pcmpgt.") || // Added in 3.1
457 Name == "pmaxs.w" || // Added in 3.9
458 Name == "pmaxu.b" || // Added in 3.9
459 Name == "pmins.w" || // Added in 3.9
460 Name == "pminu.b" || // Added in 3.9
461 Name == "pmulu.dq" || // Added in 7.0
462 Name.starts_with("pshuf") || // Added in 3.9
463 Name.starts_with("psll.dq") || // Added in 3.7
464 Name.starts_with("psrl.dq") || // Added in 3.7
465 Name.starts_with("psubs.") || // Added in 8.0
466 Name.starts_with("psubus.") || // Added in 8.0
467 Name.starts_with("sqrt.p") || // Added in 7.0
468 Name == "sqrt.sd" || // Added in 7.0
469 Name == "storel.dq" || // Added in 3.9
470 Name.starts_with("storeu.") || // Added in 3.9
471 Name == "sub.sd"); // Added in 4.0
472
473 if (Name.consume_front("sse41."))
474 return (Name.starts_with("blendp") || // Added in 3.7
475 Name == "movntdqa" || // Added in 5.0
476 Name == "pblendw" || // Added in 3.7
477 Name == "pmaxsb" || // Added in 3.9
478 Name == "pmaxsd" || // Added in 3.9
479 Name == "pmaxud" || // Added in 3.9
480 Name == "pmaxuw" || // Added in 3.9
481 Name == "pminsb" || // Added in 3.9
482 Name == "pminsd" || // Added in 3.9
483 Name == "pminud" || // Added in 3.9
484 Name == "pminuw" || // Added in 3.9
485 Name.starts_with("pmovsx") || // Added in 3.8
486 Name.starts_with("pmovzx") || // Added in 3.9
487 Name == "pmuldq"); // Added in 7.0
488
489 if (Name.consume_front("sse42."))
490 return Name == "crc32.64.8"; // Added in 3.4
491
492 if (Name.consume_front("sse4a."))
493 return Name.starts_with("movnt."); // Added in 3.9
494
495 if (Name.consume_front("ssse3."))
496 return (Name == "pabs.b.128" || // Added in 6.0
497 Name == "pabs.d.128" || // Added in 6.0
498 Name == "pabs.w.128"); // Added in 6.0
499
500 if (Name.consume_front("xop."))
501 return (Name == "vpcmov" || // Added in 3.8
502 Name == "vpcmov.256" || // Added in 5.0
503 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
504 Name.starts_with("vprot")); // Added in 8.0
505
506 return (Name == "addcarry.u32" || // Added in 8.0
507 Name == "addcarry.u64" || // Added in 8.0
508 Name == "addcarryx.u32" || // Added in 8.0
509 Name == "addcarryx.u64" || // Added in 8.0
510 Name == "subborrow.u32" || // Added in 8.0
511 Name == "subborrow.u64" || // Added in 8.0
512 Name.starts_with("vcvtph2ps.")); // Added in 11.0
513}
514
516 Function *&NewFn) {
517 // Only handle intrinsics that start with "x86.".
518 if (!Name.consume_front("x86."))
519 return false;
520
521 if (shouldUpgradeX86Intrinsic(F, Name)) {
522 NewFn = nullptr;
523 return true;
524 }
525
526 if (Name == "rdtscp") { // Added in 8.0
527 // If this intrinsic has 0 operands, it's the new version.
528 if (F->getFunctionType()->getNumParams() == 0)
529 return false;
530
531 rename(F);
532 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
533 Intrinsic::x86_rdtscp);
534 return true;
535 }
536
538
539 // SSE4.1 ptest functions may have an old signature.
540 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
542 .Case("c", Intrinsic::x86_sse41_ptestc)
543 .Case("z", Intrinsic::x86_sse41_ptestz)
544 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
547 return upgradePTESTIntrinsic(F, ID, NewFn);
548
549 return false;
550 }
551
552 // Several blend and other instructions with masks used the wrong number of
553 // bits.
554
555 // Added in 3.6
557 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
558 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
559 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
560 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
561 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
562 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
565 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
566
567 if (Name.consume_front("avx512.")) {
568 if (Name.consume_front("mask.cmp.")) {
569 // Added in 7.0
571 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
572 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
573 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
574 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
575 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
576 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
579 return upgradeX86MaskedFPCompare(F, ID, NewFn);
580 } else if (Name.starts_with("vpdpbusd.") ||
581 Name.starts_with("vpdpbusds.")) {
582 // Added in 21.1
584 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
585 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
586 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
587 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
588 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
589 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
592 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
593 }
594 return false; // No other 'x86.avx512.*'.
595 }
596
597 if (Name.consume_front("avx512bf16.")) {
598 // Added in 9.0
600 .Case("cvtne2ps2bf16.128",
601 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
602 .Case("cvtne2ps2bf16.256",
603 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
604 .Case("cvtne2ps2bf16.512",
605 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
606 .Case("mask.cvtneps2bf16.128",
607 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
608 .Case("cvtneps2bf16.256",
609 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
610 .Case("cvtneps2bf16.512",
611 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
614 return upgradeX86BF16Intrinsic(F, ID, NewFn);
615
616 // Added in 9.0
618 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
619 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
620 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
623 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
624 return false; // No other 'x86.avx512bf16.*'.
625 }
626
627 if (Name.consume_front("xop.")) {
629 if (Name.starts_with("vpermil2")) { // Added in 3.9
630 // Upgrade any XOP PERMIL2 index operand still using a float/double
631 // vector.
632 auto Idx = F->getFunctionType()->getParamType(2);
633 if (Idx->isFPOrFPVectorTy()) {
634 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
635 unsigned EltSize = Idx->getScalarSizeInBits();
636 if (EltSize == 64 && IdxSize == 128)
637 ID = Intrinsic::x86_xop_vpermil2pd;
638 else if (EltSize == 32 && IdxSize == 128)
639 ID = Intrinsic::x86_xop_vpermil2ps;
640 else if (EltSize == 64 && IdxSize == 256)
641 ID = Intrinsic::x86_xop_vpermil2pd_256;
642 else
643 ID = Intrinsic::x86_xop_vpermil2ps_256;
644 }
645 } else if (F->arg_size() == 2)
646 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
648 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
649 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
651
653 rename(F);
654 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
655 return true;
656 }
657 return false; // No other 'x86.xop.*'
658 }
659
660 if (Name == "seh.recoverfp") {
661 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
662 Intrinsic::eh_recoverfp);
663 return true;
664 }
665
666 return false;
667}
668
669// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
670// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
672 StringRef Name,
673 Function *&NewFn) {
674 if (Name.starts_with("rbit")) {
675 // '(arm|aarch64).rbit'.
677 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
678 return true;
679 }
680
681 if (Name == "thread.pointer") {
682 // '(arm|aarch64).thread.pointer'.
684 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
685 return true;
686 }
687
688 bool Neon = Name.consume_front("neon.");
689 if (Neon) {
690 // '(arm|aarch64).neon.*'.
691 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
692 // v16i8 respectively.
693 if (Name.consume_front("bfdot.")) {
694 // (arm|aarch64).neon.bfdot.*'.
697 .Cases("v2f32.v8i8", "v4f32.v16i8",
698 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
699 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
702 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
703 assert((OperandWidth == 64 || OperandWidth == 128) &&
704 "Unexpected operand width");
705 LLVMContext &Ctx = F->getParent()->getContext();
706 std::array<Type *, 2> Tys{
707 {F->getReturnType(),
708 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
709 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
710 return true;
711 }
712 return false; // No other '(arm|aarch64).neon.bfdot.*'.
713 }
714
715 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
716 // anymore and accept v8bf16 instead of v16i8.
717 if (Name.consume_front("bfm")) {
718 // (arm|aarch64).neon.bfm*'.
719 if (Name.consume_back(".v4f32.v16i8")) {
720 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
723 .Case("mla",
724 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
725 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
726 .Case("lalb",
727 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
728 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
729 .Case("lalt",
730 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
731 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
734 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
735 return true;
736 }
737 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
738 }
739 return false; // No other '(arm|aarch64).neon.bfm*.
740 }
741 // Continue on to Aarch64 Neon or Arm Neon.
742 }
743 // Continue on to Arm or Aarch64.
744
745 if (IsArm) {
746 // 'arm.*'.
747 if (Neon) {
748 // 'arm.neon.*'.
750 .StartsWith("vclz.", Intrinsic::ctlz)
751 .StartsWith("vcnt.", Intrinsic::ctpop)
752 .StartsWith("vqadds.", Intrinsic::sadd_sat)
753 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
754 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
755 .StartsWith("vqsubu.", Intrinsic::usub_sat)
756 .StartsWith("vrinta.", Intrinsic::round)
757 .StartsWith("vrintn.", Intrinsic::roundeven)
758 .StartsWith("vrintm.", Intrinsic::floor)
759 .StartsWith("vrintp.", Intrinsic::ceil)
760 .StartsWith("vrintx.", Intrinsic::rint)
761 .StartsWith("vrintz.", Intrinsic::trunc)
764 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
765 F->arg_begin()->getType());
766 return true;
767 }
768
769 if (Name.consume_front("vst")) {
770 // 'arm.neon.vst*'.
771 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
773 if (vstRegex.match(Name, &Groups)) {
774 static const Intrinsic::ID StoreInts[] = {
775 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
776 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
777
778 static const Intrinsic::ID StoreLaneInts[] = {
779 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
780 Intrinsic::arm_neon_vst4lane};
781
782 auto fArgs = F->getFunctionType()->params();
783 Type *Tys[] = {fArgs[0], fArgs[1]};
784 if (Groups[1].size() == 1)
786 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
787 else
789 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
790 return true;
791 }
792 return false; // No other 'arm.neon.vst*'.
793 }
794
795 return false; // No other 'arm.neon.*'.
796 }
797
798 if (Name.consume_front("mve.")) {
799 // 'arm.mve.*'.
800 if (Name == "vctp64") {
801 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
802 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
803 // the function and deal with it below in UpgradeIntrinsicCall.
804 rename(F);
805 return true;
806 }
807 return false; // Not 'arm.mve.vctp64'.
808 }
809
810 if (Name.starts_with("vrintn.v")) {
812 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
813 return true;
814 }
815
816 // These too are changed to accept a v2i1 instead of the old v4i1.
817 if (Name.consume_back(".v4i1")) {
818 // 'arm.mve.*.v4i1'.
819 if (Name.consume_back(".predicated.v2i64.v4i32"))
820 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
821 return Name == "mull.int" || Name == "vqdmull";
822
823 if (Name.consume_back(".v2i64")) {
824 // 'arm.mve.*.v2i64.v4i1'
825 bool IsGather = Name.consume_front("vldr.gather.");
826 if (IsGather || Name.consume_front("vstr.scatter.")) {
827 if (Name.consume_front("base.")) {
828 // Optional 'wb.' prefix.
829 Name.consume_front("wb.");
830 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
831 // predicated.v2i64.v2i64.v4i1'.
832 return Name == "predicated.v2i64";
833 }
834
835 if (Name.consume_front("offset.predicated."))
836 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
837 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
838
839 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
840 return false;
841 }
842
843 return false; // No other 'arm.mve.*.v2i64.v4i1'.
844 }
845 return false; // No other 'arm.mve.*.v4i1'.
846 }
847 return false; // No other 'arm.mve.*'.
848 }
849
850 if (Name.consume_front("cde.vcx")) {
851 // 'arm.cde.vcx*'.
852 if (Name.consume_back(".predicated.v2i64.v4i1"))
853 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
854 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
855 Name == "3q" || Name == "3qa";
856
857 return false; // No other 'arm.cde.vcx*'.
858 }
859 } else {
860 // 'aarch64.*'.
861 if (Neon) {
862 // 'aarch64.neon.*'.
864 .StartsWith("frintn", Intrinsic::roundeven)
865 .StartsWith("rbit", Intrinsic::bitreverse)
868 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
869 F->arg_begin()->getType());
870 return true;
871 }
872
873 if (Name.starts_with("addp")) {
874 // 'aarch64.neon.addp*'.
875 if (F->arg_size() != 2)
876 return false; // Invalid IR.
877 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
878 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
880 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
881 return true;
882 }
883 }
884
885 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
886 if (Name.starts_with("bfcvt")) {
887 NewFn = nullptr;
888 return true;
889 }
890
891 return false; // No other 'aarch64.neon.*'.
892 }
893 if (Name.consume_front("sve.")) {
894 // 'aarch64.sve.*'.
895 if (Name.consume_front("bf")) {
896 if (Name.consume_back(".lane")) {
897 // 'aarch64.sve.bf*.lane'.
900 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
901 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
902 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
905 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
906 return true;
907 }
908 return false; // No other 'aarch64.sve.bf*.lane'.
909 }
910 return false; // No other 'aarch64.sve.bf*'.
911 }
912
913 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
914 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
915 NewFn = nullptr;
916 return true;
917 }
918
919 if (Name.consume_front("addqv")) {
920 // 'aarch64.sve.addqv'.
921 if (!F->getReturnType()->isFPOrFPVectorTy())
922 return false;
923
924 auto Args = F->getFunctionType()->params();
925 Type *Tys[] = {F->getReturnType(), Args[1]};
927 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
928 return true;
929 }
930
931 if (Name.consume_front("ld")) {
932 // 'aarch64.sve.ld*'.
933 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
934 if (LdRegex.match(Name)) {
935 Type *ScalarTy =
936 cast<VectorType>(F->getReturnType())->getElementType();
937 ElementCount EC =
938 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
939 Type *Ty = VectorType::get(ScalarTy, EC);
940 static const Intrinsic::ID LoadIDs[] = {
941 Intrinsic::aarch64_sve_ld2_sret,
942 Intrinsic::aarch64_sve_ld3_sret,
943 Intrinsic::aarch64_sve_ld4_sret,
944 };
945 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
946 LoadIDs[Name[0] - '2'], Ty);
947 return true;
948 }
949 return false; // No other 'aarch64.sve.ld*'.
950 }
951
952 if (Name.consume_front("tuple.")) {
953 // 'aarch64.sve.tuple.*'.
954 if (Name.starts_with("get")) {
955 // 'aarch64.sve.tuple.get*'.
956 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
958 F->getParent(), Intrinsic::vector_extract, Tys);
959 return true;
960 }
961
962 if (Name.starts_with("set")) {
963 // 'aarch64.sve.tuple.set*'.
964 auto Args = F->getFunctionType()->params();
965 Type *Tys[] = {Args[0], Args[2], Args[1]};
967 F->getParent(), Intrinsic::vector_insert, Tys);
968 return true;
969 }
970
971 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
972 if (CreateTupleRegex.match(Name)) {
973 // 'aarch64.sve.tuple.create*'.
974 auto Args = F->getFunctionType()->params();
975 Type *Tys[] = {F->getReturnType(), Args[1]};
977 F->getParent(), Intrinsic::vector_insert, Tys);
978 return true;
979 }
980 return false; // No other 'aarch64.sve.tuple.*'.
981 }
982 return false; // No other 'aarch64.sve.*'.
983 }
984 }
985 return false; // No other 'arm.*', 'aarch64.*'.
986}
987
989 StringRef Name) {
990 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
993 .Case("im2col.3d",
994 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
995 .Case("im2col.4d",
996 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
997 .Case("im2col.5d",
998 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
999 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1000 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1001 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1002 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1003 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1005
1007 return ID;
1008
1009 // These intrinsics may need upgrade for two reasons:
1010 // (1) When the address-space of the first argument is shared[AS=3]
1011 // (and we upgrade it to use shared_cluster address-space[AS=7])
1012 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1014 return ID;
1015
1016 // (2) When there are only two boolean flag arguments at the end:
1017 //
1018 // The last three parameters of the older version of these
1019 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1020 //
1021 // The newer version reads as:
1022 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1023 //
1024 // So, when the type of the [N-3]rd argument is "not i1", then
1025 // it is the older version and we need to upgrade.
1026 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1027 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1028 if (!ArgType->isIntegerTy(1))
1029 return ID;
1030 }
1031
1033}
1034
1036 StringRef Name) {
1037 if (Name.consume_front("mapa.shared.cluster"))
1038 if (F->getReturnType()->getPointerAddressSpace() ==
1040 return Intrinsic::nvvm_mapa_shared_cluster;
1041
1042 if (Name.consume_front("cp.async.bulk.")) {
1045 .Case("global.to.shared.cluster",
1046 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1047 .Case("shared.cta.to.cluster",
1048 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1050
1052 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1054 return ID;
1055 }
1056
1058}
1059
1061 if (Name.consume_front("fma.rn."))
1062 return StringSwitch<Intrinsic::ID>(Name)
1063 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1064 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1065 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1066 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1067 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1068 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1069 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1070 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1071 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1072 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1073 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1074 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1076
1077 if (Name.consume_front("fmax."))
1078 return StringSwitch<Intrinsic::ID>(Name)
1079 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1080 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1081 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1082 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1083 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1084 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1085 .Case("ftz.nan.xorsign.abs.bf16",
1086 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1087 .Case("ftz.nan.xorsign.abs.bf16x2",
1088 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1089 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1090 .Case("ftz.xorsign.abs.bf16x2",
1091 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1092 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1093 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1094 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1095 .Case("nan.xorsign.abs.bf16x2",
1096 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1097 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1098 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1100
1101 if (Name.consume_front("fmin."))
1102 return StringSwitch<Intrinsic::ID>(Name)
1103 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1104 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1105 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1106 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1107 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1108 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1109 .Case("ftz.nan.xorsign.abs.bf16",
1110 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1111 .Case("ftz.nan.xorsign.abs.bf16x2",
1112 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1113 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1114 .Case("ftz.xorsign.abs.bf16x2",
1115 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1116 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1117 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1118 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1119 .Case("nan.xorsign.abs.bf16x2",
1120 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1121 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1122 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1124
1125 if (Name.consume_front("neg."))
1126 return StringSwitch<Intrinsic::ID>(Name)
1127 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1128 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1130
1132}
1133
1135 return Name.consume_front("local") || Name.consume_front("shared") ||
1136 Name.consume_front("global") || Name.consume_front("constant") ||
1137 Name.consume_front("param");
1138}
1139
1141 bool CanUpgradeDebugIntrinsicsToRecords) {
1142 assert(F && "Illegal to upgrade a non-existent Function.");
1143
1144 StringRef Name = F->getName();
1145
1146 // Quickly eliminate it, if it's not a candidate.
1147 if (!Name.consume_front("llvm.") || Name.empty())
1148 return false;
1149
1150 switch (Name[0]) {
1151 default: break;
1152 case 'a': {
1153 bool IsArm = Name.consume_front("arm.");
1154 if (IsArm || Name.consume_front("aarch64.")) {
1155 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1156 return true;
1157 break;
1158 }
1159
1160 if (Name.consume_front("amdgcn.")) {
1161 if (Name == "alignbit") {
1162 // Target specific intrinsic became redundant
1164 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1165 return true;
1166 }
1167
1168 if (Name.consume_front("atomic.")) {
1169 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1170 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1171 // there's no new declaration.
1172 NewFn = nullptr;
1173 return true;
1174 }
1175 break; // No other 'amdgcn.atomic.*'
1176 }
1177
1178 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1179 Name.consume_front("flat.atomic.")) {
1180 if (Name.starts_with("fadd") ||
1181 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1182 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1183 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1184 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1185 // declaration.
1186 NewFn = nullptr;
1187 return true;
1188 }
1189 }
1190
1191 if (Name.starts_with("ldexp.")) {
1192 // Target specific intrinsic became redundant
1194 F->getParent(), Intrinsic::ldexp,
1195 {F->getReturnType(), F->getArg(1)->getType()});
1196 return true;
1197 }
1198 break; // No other 'amdgcn.*'
1199 }
1200
1201 break;
1202 }
1203 case 'c': {
1204 if (F->arg_size() == 1) {
1206 .StartsWith("ctlz.", Intrinsic::ctlz)
1207 .StartsWith("cttz.", Intrinsic::cttz)
1210 rename(F);
1211 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1212 F->arg_begin()->getType());
1213 return true;
1214 }
1215 }
1216
1217 if (F->arg_size() == 2 && Name == "coro.end") {
1218 rename(F);
1219 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1220 Intrinsic::coro_end);
1221 return true;
1222 }
1223
1224 break;
1225 }
1226 case 'd':
1227 if (Name.consume_front("dbg.")) {
1228 // Mark debug intrinsics for upgrade to new debug format.
1229 if (CanUpgradeDebugIntrinsicsToRecords) {
1230 if (Name == "addr" || Name == "value" || Name == "assign" ||
1231 Name == "declare" || Name == "label") {
1232 // There's no function to replace these with.
1233 NewFn = nullptr;
1234 // But we do want these to get upgraded.
1235 return true;
1236 }
1237 }
1238 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1239 // converted to DbgVariableRecords later.
1240 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1241 rename(F);
1242 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1243 Intrinsic::dbg_value);
1244 return true;
1245 }
1246 break; // No other 'dbg.*'.
1247 }
1248 break;
1249 case 'e':
1250 if (Name.consume_front("experimental.vector.")) {
1253 // Skip over extract.last.active, otherwise it will be 'upgraded'
1254 // to a regular vector extract which is a different operation.
1255 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1256 .StartsWith("extract.", Intrinsic::vector_extract)
1257 .StartsWith("insert.", Intrinsic::vector_insert)
1258 .StartsWith("splice.", Intrinsic::vector_splice)
1259 .StartsWith("reverse.", Intrinsic::vector_reverse)
1260 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1261 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1264 const auto *FT = F->getFunctionType();
1266 if (ID == Intrinsic::vector_extract ||
1267 ID == Intrinsic::vector_interleave2)
1268 // Extracting overloads the return type.
1269 Tys.push_back(FT->getReturnType());
1270 if (ID != Intrinsic::vector_interleave2)
1271 Tys.push_back(FT->getParamType(0));
1272 if (ID == Intrinsic::vector_insert)
1273 // Inserting overloads the inserted type.
1274 Tys.push_back(FT->getParamType(1));
1275 rename(F);
1276 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1277 return true;
1278 }
1279
1280 if (Name.consume_front("reduce.")) {
1282 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1283 if (R.match(Name, &Groups))
1285 .Case("add", Intrinsic::vector_reduce_add)
1286 .Case("mul", Intrinsic::vector_reduce_mul)
1287 .Case("and", Intrinsic::vector_reduce_and)
1288 .Case("or", Intrinsic::vector_reduce_or)
1289 .Case("xor", Intrinsic::vector_reduce_xor)
1290 .Case("smax", Intrinsic::vector_reduce_smax)
1291 .Case("smin", Intrinsic::vector_reduce_smin)
1292 .Case("umax", Intrinsic::vector_reduce_umax)
1293 .Case("umin", Intrinsic::vector_reduce_umin)
1294 .Case("fmax", Intrinsic::vector_reduce_fmax)
1295 .Case("fmin", Intrinsic::vector_reduce_fmin)
1297
1298 bool V2 = false;
1300 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1301 Groups.clear();
1302 V2 = true;
1303 if (R2.match(Name, &Groups))
1305 .Case("fadd", Intrinsic::vector_reduce_fadd)
1306 .Case("fmul", Intrinsic::vector_reduce_fmul)
1308 }
1310 rename(F);
1311 auto Args = F->getFunctionType()->params();
1312 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1313 {Args[V2 ? 1 : 0]});
1314 return true;
1315 }
1316 break; // No other 'expermental.vector.reduce.*'.
1317 }
1318 break; // No other 'experimental.vector.*'.
1319 }
1320 if (Name.consume_front("experimental.stepvector.")) {
1321 Intrinsic::ID ID = Intrinsic::stepvector;
1322 rename(F);
1324 F->getParent(), ID, F->getFunctionType()->getReturnType());
1325 return true;
1326 }
1327 break; // No other 'e*'.
1328 case 'f':
1329 if (Name.starts_with("flt.rounds")) {
1330 rename(F);
1331 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1332 Intrinsic::get_rounding);
1333 return true;
1334 }
1335 break;
1336 case 'i':
1337 if (Name.starts_with("invariant.group.barrier")) {
1338 // Rename invariant.group.barrier to launder.invariant.group
1339 auto Args = F->getFunctionType()->params();
1340 Type* ObjectPtr[1] = {Args[0]};
1341 rename(F);
1343 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1344 return true;
1345 }
1346 break;
1347 case 'l':
1348 if ((Name.starts_with("lifetime.start") ||
1349 Name.starts_with("lifetime.end")) &&
1350 F->arg_size() == 2) {
1351 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1352 ? Intrinsic::lifetime_start
1353 : Intrinsic::lifetime_end;
1354 rename(F);
1355 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1356 F->getArg(0)->getType());
1357 return true;
1358 }
1359 break;
1360 case 'm': {
1361 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1362 // alignment parameter to embedding the alignment as an attribute of
1363 // the pointer args.
1364 if (unsigned ID = StringSwitch<unsigned>(Name)
1365 .StartsWith("memcpy.", Intrinsic::memcpy)
1366 .StartsWith("memmove.", Intrinsic::memmove)
1367 .Default(0)) {
1368 if (F->arg_size() == 5) {
1369 rename(F);
1370 // Get the types of dest, src, and len
1371 ArrayRef<Type *> ParamTypes =
1372 F->getFunctionType()->params().slice(0, 3);
1373 NewFn =
1374 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1375 return true;
1376 }
1377 }
1378 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1379 rename(F);
1380 // Get the types of dest, and len
1381 const auto *FT = F->getFunctionType();
1382 Type *ParamTypes[2] = {
1383 FT->getParamType(0), // Dest
1384 FT->getParamType(2) // len
1385 };
1386 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1387 Intrinsic::memset, ParamTypes);
1388 return true;
1389 }
1390 break;
1391 }
1392 case 'n': {
1393 if (Name.consume_front("nvvm.")) {
1394 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1395 if (F->arg_size() == 1) {
1396 Intrinsic::ID IID =
1398 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1399 .Case("clz.i", Intrinsic::ctlz)
1400 .Case("popc.i", Intrinsic::ctpop)
1402 if (IID != Intrinsic::not_intrinsic) {
1403 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1404 {F->getReturnType()});
1405 return true;
1406 }
1407 }
1408
1409 // Check for nvvm intrinsics that need a return type adjustment.
1410 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1412 if (IID != Intrinsic::not_intrinsic) {
1413 NewFn = nullptr;
1414 return true;
1415 }
1416 }
1417
1418 // Upgrade Distributed Shared Memory Intrinsics
1420 if (IID != Intrinsic::not_intrinsic) {
1421 rename(F);
1422 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1423 return true;
1424 }
1425
1426 // Upgrade TMA copy G2S Intrinsics
1428 if (IID != Intrinsic::not_intrinsic) {
1429 rename(F);
1430 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1431 return true;
1432 }
1433
1434 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1435 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1436 //
1437 // TODO: We could add lohi.i2d.
1438 bool Expand = false;
1439 if (Name.consume_front("abs."))
1440 // nvvm.abs.{i,ii}
1441 Expand =
1442 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1443 else if (Name.consume_front("fabs."))
1444 // nvvm.fabs.{f,ftz.f,d}
1445 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1446 else if (Name.consume_front("max.") || Name.consume_front("min."))
1447 // nvvm.{min,max}.{i,ii,ui,ull}
1448 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1449 Name == "ui" || Name == "ull";
1450 else if (Name.consume_front("atomic.load."))
1451 // nvvm.atomic.load.add.{f32,f64}.p
1452 // nvvm.atomic.load.{inc,dec}.32.p
1453 Expand = StringSwitch<bool>(Name)
1454 .StartsWith("add.f32.p", true)
1455 .StartsWith("add.f64.p", true)
1456 .StartsWith("inc.32.p", true)
1457 .StartsWith("dec.32.p", true)
1458 .Default(false);
1459 else if (Name.consume_front("bitcast."))
1460 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1461 Expand =
1462 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1463 else if (Name.consume_front("rotate."))
1464 // nvvm.rotate.{b32,b64,right.b64}
1465 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1466 else if (Name.consume_front("ptr.gen.to."))
1467 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1468 Expand = consumeNVVMPtrAddrSpace(Name);
1469 else if (Name.consume_front("ptr."))
1470 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1471 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1472 else if (Name.consume_front("ldg.global."))
1473 // nvvm.ldg.global.{i,p,f}
1474 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1475 Name.starts_with("p."));
1476 else
1477 Expand = StringSwitch<bool>(Name)
1478 .Case("barrier0", true)
1479 .Case("barrier.n", true)
1480 .Case("barrier.sync.cnt", true)
1481 .Case("barrier.sync", true)
1482 .Case("barrier", true)
1483 .Case("bar.sync", true)
1484 .Case("clz.ll", true)
1485 .Case("popc.ll", true)
1486 .Case("h2f", true)
1487 .Case("swap.lo.hi.b64", true)
1488 .Case("tanh.approx.f32", true)
1489 .Default(false);
1490
1491 if (Expand) {
1492 NewFn = nullptr;
1493 return true;
1494 }
1495 break; // No other 'nvvm.*'.
1496 }
1497 break;
1498 }
1499 case 'o':
1500 if (Name.starts_with("objectsize.")) {
1501 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1502 if (F->arg_size() == 2 || F->arg_size() == 3) {
1503 rename(F);
1504 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1505 Intrinsic::objectsize, Tys);
1506 return true;
1507 }
1508 }
1509 break;
1510
1511 case 'p':
1512 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1513 rename(F);
1515 F->getParent(), Intrinsic::ptr_annotation,
1516 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1517 return true;
1518 }
1519 break;
1520
1521 case 'r': {
1522 if (Name.consume_front("riscv.")) {
1525 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1526 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1527 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1528 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1531 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1532 rename(F);
1533 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1534 return true;
1535 }
1536 break; // No other applicable upgrades.
1537 }
1538
1540 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1541 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1544 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1545 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1546 rename(F);
1547 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1548 return true;
1549 }
1550 break; // No other applicable upgrades.
1551 }
1552
1554 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1555 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1556 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1557 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1558 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1559 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1562 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1563 rename(F);
1564 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1565 return true;
1566 }
1567 break; // No other applicable upgrades.
1568 }
1569 break; // No other 'riscv.*' intrinsics
1570 }
1571 } break;
1572
1573 case 's':
1574 if (Name == "stackprotectorcheck") {
1575 NewFn = nullptr;
1576 return true;
1577 }
1578 break;
1579
1580 case 't':
1581 if (Name == "thread.pointer") {
1583 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1584 return true;
1585 }
1586 break;
1587
1588 case 'v': {
1589 if (Name == "var.annotation" && F->arg_size() == 4) {
1590 rename(F);
1592 F->getParent(), Intrinsic::var_annotation,
1593 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1594 return true;
1595 }
1596 break;
1597 }
1598
1599 case 'w':
1600 if (Name.consume_front("wasm.")) {
1603 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1604 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1605 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1608 rename(F);
1609 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1610 F->getReturnType());
1611 return true;
1612 }
1613
1614 if (Name.consume_front("dot.i8x16.i7x16.")) {
1616 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1617 .Case("add.signed",
1618 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1621 rename(F);
1622 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1623 return true;
1624 }
1625 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1626 }
1627 break; // No other 'wasm.*'.
1628 }
1629 break;
1630
1631 case 'x':
1632 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1633 return true;
1634 }
1635
1636 auto *ST = dyn_cast<StructType>(F->getReturnType());
1637 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1638 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1639 // Replace return type with literal non-packed struct. Only do this for
1640 // intrinsics declared to return a struct, not for intrinsics with
1641 // overloaded return type, in which case the exact struct type will be
1642 // mangled into the name.
1645 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1646 auto *FT = F->getFunctionType();
1647 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1648 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1649 std::string Name = F->getName().str();
1650 rename(F);
1651 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1652 Name, F->getParent());
1653
1654 // The new function may also need remangling.
1655 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1656 NewFn = *Result;
1657 return true;
1658 }
1659 }
1660
1661 // Remangle our intrinsic since we upgrade the mangling
1663 if (Result != std::nullopt) {
1664 NewFn = *Result;
1665 return true;
1666 }
1667
1668 // This may not belong here. This function is effectively being overloaded
1669 // to both detect an intrinsic which needs upgrading, and to provide the
1670 // upgraded form of the intrinsic. We should perhaps have two separate
1671 // functions for this.
1672 return false;
1673}
1674
1676 bool CanUpgradeDebugIntrinsicsToRecords) {
1677 NewFn = nullptr;
1678 bool Upgraded =
1679 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1680
1681 // Upgrade intrinsic attributes. This does not change the function.
1682 if (NewFn)
1683 F = NewFn;
1684 if (Intrinsic::ID id = F->getIntrinsicID()) {
1685 // Only do this if the intrinsic signature is valid.
1686 SmallVector<Type *> OverloadTys;
1687 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1688 F->setAttributes(
1689 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1690 }
1691 return Upgraded;
1692}
1693
1695 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1696 GV->getName() == "llvm.global_dtors")) ||
1697 !GV->hasInitializer())
1698 return nullptr;
1700 if (!ATy)
1701 return nullptr;
1703 if (!STy || STy->getNumElements() != 2)
1704 return nullptr;
1705
1706 LLVMContext &C = GV->getContext();
1707 IRBuilder<> IRB(C);
1708 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1709 IRB.getPtrTy());
1710 Constant *Init = GV->getInitializer();
1711 unsigned N = Init->getNumOperands();
1712 std::vector<Constant *> NewCtors(N);
1713 for (unsigned i = 0; i != N; ++i) {
1714 auto Ctor = cast<Constant>(Init->getOperand(i));
1715 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1716 Ctor->getAggregateElement(1),
1718 }
1719 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1720
1721 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1722 NewInit, GV->getName());
1723}
1724
1725// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1726// to byte shuffles.
1728 unsigned Shift) {
1729 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1730 unsigned NumElts = ResultTy->getNumElements() * 8;
1731
1732 // Bitcast from a 64-bit element type to a byte element type.
1733 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1734 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1735
1736 // We'll be shuffling in zeroes.
1737 Value *Res = Constant::getNullValue(VecTy);
1738
1739 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1740 // we'll just return the zero vector.
1741 if (Shift < 16) {
1742 int Idxs[64];
1743 // 256/512-bit version is split into 2/4 16-byte lanes.
1744 for (unsigned l = 0; l != NumElts; l += 16)
1745 for (unsigned i = 0; i != 16; ++i) {
1746 unsigned Idx = NumElts + i - Shift;
1747 if (Idx < NumElts)
1748 Idx -= NumElts - 16; // end of lane, switch operand.
1749 Idxs[l + i] = Idx + l;
1750 }
1751
1752 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1753 }
1754
1755 // Bitcast back to a 64-bit element type.
1756 return Builder.CreateBitCast(Res, ResultTy, "cast");
1757}
1758
1759// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1760// to byte shuffles.
1762 unsigned Shift) {
1763 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1764 unsigned NumElts = ResultTy->getNumElements() * 8;
1765
1766 // Bitcast from a 64-bit element type to a byte element type.
1767 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1768 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1769
1770 // We'll be shuffling in zeroes.
1771 Value *Res = Constant::getNullValue(VecTy);
1772
1773 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1774 // we'll just return the zero vector.
1775 if (Shift < 16) {
1776 int Idxs[64];
1777 // 256/512-bit version is split into 2/4 16-byte lanes.
1778 for (unsigned l = 0; l != NumElts; l += 16)
1779 for (unsigned i = 0; i != 16; ++i) {
1780 unsigned Idx = i + Shift;
1781 if (Idx >= 16)
1782 Idx += NumElts - 16; // end of lane, switch operand.
1783 Idxs[l + i] = Idx + l;
1784 }
1785
1786 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1787 }
1788
1789 // Bitcast back to a 64-bit element type.
1790 return Builder.CreateBitCast(Res, ResultTy, "cast");
1791}
1792
1793static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1794 unsigned NumElts) {
1795 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1797 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1798 Mask = Builder.CreateBitCast(Mask, MaskTy);
1799
1800 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1801 // i8 and we need to extract down to the right number of elements.
1802 if (NumElts <= 4) {
1803 int Indices[4];
1804 for (unsigned i = 0; i != NumElts; ++i)
1805 Indices[i] = i;
1806 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1807 "extract");
1808 }
1809
1810 return Mask;
1811}
1812
1813static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1814 Value *Op1) {
1815 // If the mask is all ones just emit the first operation.
1816 if (const auto *C = dyn_cast<Constant>(Mask))
1817 if (C->isAllOnesValue())
1818 return Op0;
1819
1820 Mask = getX86MaskVec(Builder, Mask,
1821 cast<FixedVectorType>(Op0->getType())->getNumElements());
1822 return Builder.CreateSelect(Mask, Op0, Op1);
1823}
1824
1825static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1826 Value *Op1) {
1827 // If the mask is all ones just emit the first operation.
1828 if (const auto *C = dyn_cast<Constant>(Mask))
1829 if (C->isAllOnesValue())
1830 return Op0;
1831
1832 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1833 Mask->getType()->getIntegerBitWidth());
1834 Mask = Builder.CreateBitCast(Mask, MaskTy);
1835 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1836 return Builder.CreateSelect(Mask, Op0, Op1);
1837}
1838
1839// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1840// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1841// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1843 Value *Op1, Value *Shift,
1844 Value *Passthru, Value *Mask,
1845 bool IsVALIGN) {
1846 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1847
1848 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1849 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1850 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1851 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1852
1853 // Mask the immediate for VALIGN.
1854 if (IsVALIGN)
1855 ShiftVal &= (NumElts - 1);
1856
1857 // If palignr is shifting the pair of vectors more than the size of two
1858 // lanes, emit zero.
1859 if (ShiftVal >= 32)
1861
1862 // If palignr is shifting the pair of input vectors more than one lane,
1863 // but less than two lanes, convert to shifting in zeroes.
1864 if (ShiftVal > 16) {
1865 ShiftVal -= 16;
1866 Op1 = Op0;
1868 }
1869
1870 int Indices[64];
1871 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1872 for (unsigned l = 0; l < NumElts; l += 16) {
1873 for (unsigned i = 0; i != 16; ++i) {
1874 unsigned Idx = ShiftVal + i;
1875 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1876 Idx += NumElts - 16; // End of lane, switch operand.
1877 Indices[l + i] = Idx + l;
1878 }
1879 }
1880
1881 Value *Align = Builder.CreateShuffleVector(
1882 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1883
1884 return emitX86Select(Builder, Mask, Align, Passthru);
1885}
1886
1888 bool ZeroMask, bool IndexForm) {
1889 Type *Ty = CI.getType();
1890 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1891 unsigned EltWidth = Ty->getScalarSizeInBits();
1892 bool IsFloat = Ty->isFPOrFPVectorTy();
1893 Intrinsic::ID IID;
1894 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1895 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1896 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1897 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1898 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1899 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1900 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1901 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1902 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1903 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1904 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1905 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1906 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1907 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1908 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1909 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1910 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1911 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1912 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1913 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1914 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1915 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1916 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1917 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1918 else if (VecWidth == 128 && EltWidth == 16)
1919 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1920 else if (VecWidth == 256 && EltWidth == 16)
1921 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1922 else if (VecWidth == 512 && EltWidth == 16)
1923 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1924 else if (VecWidth == 128 && EltWidth == 8)
1925 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1926 else if (VecWidth == 256 && EltWidth == 8)
1927 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1928 else if (VecWidth == 512 && EltWidth == 8)
1929 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1930 else
1931 llvm_unreachable("Unexpected intrinsic");
1932
1933 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1934 CI.getArgOperand(2) };
1935
1936 // If this isn't index form we need to swap operand 0 and 1.
1937 if (!IndexForm)
1938 std::swap(Args[0], Args[1]);
1939
1940 Value *V = Builder.CreateIntrinsic(IID, Args);
1941 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1942 : Builder.CreateBitCast(CI.getArgOperand(1),
1943 Ty);
1944 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1945}
1946
1948 Intrinsic::ID IID) {
1949 Type *Ty = CI.getType();
1950 Value *Op0 = CI.getOperand(0);
1951 Value *Op1 = CI.getOperand(1);
1952 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
1953
1954 if (CI.arg_size() == 4) { // For masked intrinsics.
1955 Value *VecSrc = CI.getOperand(2);
1956 Value *Mask = CI.getOperand(3);
1957 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1958 }
1959 return Res;
1960}
1961
1963 bool IsRotateRight) {
1964 Type *Ty = CI.getType();
1965 Value *Src = CI.getArgOperand(0);
1966 Value *Amt = CI.getArgOperand(1);
1967
1968 // Amount may be scalar immediate, in which case create a splat vector.
1969 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1970 // we only care about the lowest log2 bits anyway.
1971 if (Amt->getType() != Ty) {
1972 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1973 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1974 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1975 }
1976
1977 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1978 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
1979
1980 if (CI.arg_size() == 4) { // For masked intrinsics.
1981 Value *VecSrc = CI.getOperand(2);
1982 Value *Mask = CI.getOperand(3);
1983 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1984 }
1985 return Res;
1986}
1987
1988static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1989 bool IsSigned) {
1990 Type *Ty = CI.getType();
1991 Value *LHS = CI.getArgOperand(0);
1992 Value *RHS = CI.getArgOperand(1);
1993
1994 CmpInst::Predicate Pred;
1995 switch (Imm) {
1996 case 0x0:
1997 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1998 break;
1999 case 0x1:
2000 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2001 break;
2002 case 0x2:
2003 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2004 break;
2005 case 0x3:
2006 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2007 break;
2008 case 0x4:
2009 Pred = ICmpInst::ICMP_EQ;
2010 break;
2011 case 0x5:
2012 Pred = ICmpInst::ICMP_NE;
2013 break;
2014 case 0x6:
2015 return Constant::getNullValue(Ty); // FALSE
2016 case 0x7:
2017 return Constant::getAllOnesValue(Ty); // TRUE
2018 default:
2019 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2020 }
2021
2022 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2023 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2024 return Ext;
2025}
2026
2028 bool IsShiftRight, bool ZeroMask) {
2029 Type *Ty = CI.getType();
2030 Value *Op0 = CI.getArgOperand(0);
2031 Value *Op1 = CI.getArgOperand(1);
2032 Value *Amt = CI.getArgOperand(2);
2033
2034 if (IsShiftRight)
2035 std::swap(Op0, Op1);
2036
2037 // Amount may be scalar immediate, in which case create a splat vector.
2038 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2039 // we only care about the lowest log2 bits anyway.
2040 if (Amt->getType() != Ty) {
2041 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2042 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2043 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2044 }
2045
2046 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2047 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2048
2049 unsigned NumArgs = CI.arg_size();
2050 if (NumArgs >= 4) { // For masked intrinsics.
2051 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2052 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2053 CI.getArgOperand(0);
2054 Value *Mask = CI.getOperand(NumArgs - 1);
2055 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2056 }
2057 return Res;
2058}
2059
2061 Value *Mask, bool Aligned) {
2062 const Align Alignment =
2063 Aligned
2064 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2065 : Align(1);
2066
2067 // If the mask is all ones just emit a regular store.
2068 if (const auto *C = dyn_cast<Constant>(Mask))
2069 if (C->isAllOnesValue())
2070 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2071
2072 // Convert the mask from an integer type to a vector of i1.
2073 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2074 Mask = getX86MaskVec(Builder, Mask, NumElts);
2075 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2076}
2077
2079 Value *Passthru, Value *Mask, bool Aligned) {
2080 Type *ValTy = Passthru->getType();
2081 const Align Alignment =
2082 Aligned
2083 ? Align(
2085 8)
2086 : Align(1);
2087
2088 // If the mask is all ones just emit a regular store.
2089 if (const auto *C = dyn_cast<Constant>(Mask))
2090 if (C->isAllOnesValue())
2091 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2092
2093 // Convert the mask from an integer type to a vector of i1.
2094 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2095 Mask = getX86MaskVec(Builder, Mask, NumElts);
2096 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2097}
2098
2099static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2100 Type *Ty = CI.getType();
2101 Value *Op0 = CI.getArgOperand(0);
2102 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2103 {Op0, Builder.getInt1(false)});
2104 if (CI.arg_size() == 3)
2105 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2106 return Res;
2107}
2108
2109static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2110 Type *Ty = CI.getType();
2111
2112 // Arguments have a vXi32 type so cast to vXi64.
2113 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2114 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2115
2116 if (IsSigned) {
2117 // Shift left then arithmetic shift right.
2118 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2119 LHS = Builder.CreateShl(LHS, ShiftAmt);
2120 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2121 RHS = Builder.CreateShl(RHS, ShiftAmt);
2122 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2123 } else {
2124 // Clear the upper bits.
2125 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2126 LHS = Builder.CreateAnd(LHS, Mask);
2127 RHS = Builder.CreateAnd(RHS, Mask);
2128 }
2129
2130 Value *Res = Builder.CreateMul(LHS, RHS);
2131
2132 if (CI.arg_size() == 4)
2133 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2134
2135 return Res;
2136}
2137
2138// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2140 Value *Mask) {
2141 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2142 if (Mask) {
2143 const auto *C = dyn_cast<Constant>(Mask);
2144 if (!C || !C->isAllOnesValue())
2145 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2146 }
2147
2148 if (NumElts < 8) {
2149 int Indices[8];
2150 for (unsigned i = 0; i != NumElts; ++i)
2151 Indices[i] = i;
2152 for (unsigned i = NumElts; i != 8; ++i)
2153 Indices[i] = NumElts + i % NumElts;
2154 Vec = Builder.CreateShuffleVector(Vec,
2156 Indices);
2157 }
2158 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2159}
2160
2162 unsigned CC, bool Signed) {
2163 Value *Op0 = CI.getArgOperand(0);
2164 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2165
2166 Value *Cmp;
2167 if (CC == 3) {
2169 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2170 } else if (CC == 7) {
2172 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2173 } else {
2175 switch (CC) {
2176 default: llvm_unreachable("Unknown condition code");
2177 case 0: Pred = ICmpInst::ICMP_EQ; break;
2178 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2179 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2180 case 4: Pred = ICmpInst::ICMP_NE; break;
2181 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2182 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2183 }
2184 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2185 }
2186
2187 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2188
2189 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2190}
2191
2192// Replace a masked intrinsic with an older unmasked intrinsic.
2194 Intrinsic::ID IID) {
2195 Value *Rep =
2196 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2197 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2198}
2199
2201 Value* A = CI.getArgOperand(0);
2202 Value* B = CI.getArgOperand(1);
2203 Value* Src = CI.getArgOperand(2);
2204 Value* Mask = CI.getArgOperand(3);
2205
2206 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2207 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2208 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2209 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2210 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2211 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2212}
2213
2215 Value* Op = CI.getArgOperand(0);
2216 Type* ReturnOp = CI.getType();
2217 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2218 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2219 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2220}
2221
2222// Replace intrinsic with unmasked version and a select.
2224 CallBase &CI, Value *&Rep) {
2225 Name = Name.substr(12); // Remove avx512.mask.
2226
2227 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2228 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2229 Intrinsic::ID IID;
2230 if (Name.starts_with("max.p")) {
2231 if (VecWidth == 128 && EltWidth == 32)
2232 IID = Intrinsic::x86_sse_max_ps;
2233 else if (VecWidth == 128 && EltWidth == 64)
2234 IID = Intrinsic::x86_sse2_max_pd;
2235 else if (VecWidth == 256 && EltWidth == 32)
2236 IID = Intrinsic::x86_avx_max_ps_256;
2237 else if (VecWidth == 256 && EltWidth == 64)
2238 IID = Intrinsic::x86_avx_max_pd_256;
2239 else
2240 llvm_unreachable("Unexpected intrinsic");
2241 } else if (Name.starts_with("min.p")) {
2242 if (VecWidth == 128 && EltWidth == 32)
2243 IID = Intrinsic::x86_sse_min_ps;
2244 else if (VecWidth == 128 && EltWidth == 64)
2245 IID = Intrinsic::x86_sse2_min_pd;
2246 else if (VecWidth == 256 && EltWidth == 32)
2247 IID = Intrinsic::x86_avx_min_ps_256;
2248 else if (VecWidth == 256 && EltWidth == 64)
2249 IID = Intrinsic::x86_avx_min_pd_256;
2250 else
2251 llvm_unreachable("Unexpected intrinsic");
2252 } else if (Name.starts_with("pshuf.b.")) {
2253 if (VecWidth == 128)
2254 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2255 else if (VecWidth == 256)
2256 IID = Intrinsic::x86_avx2_pshuf_b;
2257 else if (VecWidth == 512)
2258 IID = Intrinsic::x86_avx512_pshuf_b_512;
2259 else
2260 llvm_unreachable("Unexpected intrinsic");
2261 } else if (Name.starts_with("pmul.hr.sw.")) {
2262 if (VecWidth == 128)
2263 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2264 else if (VecWidth == 256)
2265 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2266 else if (VecWidth == 512)
2267 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2268 else
2269 llvm_unreachable("Unexpected intrinsic");
2270 } else if (Name.starts_with("pmulh.w.")) {
2271 if (VecWidth == 128)
2272 IID = Intrinsic::x86_sse2_pmulh_w;
2273 else if (VecWidth == 256)
2274 IID = Intrinsic::x86_avx2_pmulh_w;
2275 else if (VecWidth == 512)
2276 IID = Intrinsic::x86_avx512_pmulh_w_512;
2277 else
2278 llvm_unreachable("Unexpected intrinsic");
2279 } else if (Name.starts_with("pmulhu.w.")) {
2280 if (VecWidth == 128)
2281 IID = Intrinsic::x86_sse2_pmulhu_w;
2282 else if (VecWidth == 256)
2283 IID = Intrinsic::x86_avx2_pmulhu_w;
2284 else if (VecWidth == 512)
2285 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2286 else
2287 llvm_unreachable("Unexpected intrinsic");
2288 } else if (Name.starts_with("pmaddw.d.")) {
2289 if (VecWidth == 128)
2290 IID = Intrinsic::x86_sse2_pmadd_wd;
2291 else if (VecWidth == 256)
2292 IID = Intrinsic::x86_avx2_pmadd_wd;
2293 else if (VecWidth == 512)
2294 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2295 else
2296 llvm_unreachable("Unexpected intrinsic");
2297 } else if (Name.starts_with("pmaddubs.w.")) {
2298 if (VecWidth == 128)
2299 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2300 else if (VecWidth == 256)
2301 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2302 else if (VecWidth == 512)
2303 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2304 else
2305 llvm_unreachable("Unexpected intrinsic");
2306 } else if (Name.starts_with("packsswb.")) {
2307 if (VecWidth == 128)
2308 IID = Intrinsic::x86_sse2_packsswb_128;
2309 else if (VecWidth == 256)
2310 IID = Intrinsic::x86_avx2_packsswb;
2311 else if (VecWidth == 512)
2312 IID = Intrinsic::x86_avx512_packsswb_512;
2313 else
2314 llvm_unreachable("Unexpected intrinsic");
2315 } else if (Name.starts_with("packssdw.")) {
2316 if (VecWidth == 128)
2317 IID = Intrinsic::x86_sse2_packssdw_128;
2318 else if (VecWidth == 256)
2319 IID = Intrinsic::x86_avx2_packssdw;
2320 else if (VecWidth == 512)
2321 IID = Intrinsic::x86_avx512_packssdw_512;
2322 else
2323 llvm_unreachable("Unexpected intrinsic");
2324 } else if (Name.starts_with("packuswb.")) {
2325 if (VecWidth == 128)
2326 IID = Intrinsic::x86_sse2_packuswb_128;
2327 else if (VecWidth == 256)
2328 IID = Intrinsic::x86_avx2_packuswb;
2329 else if (VecWidth == 512)
2330 IID = Intrinsic::x86_avx512_packuswb_512;
2331 else
2332 llvm_unreachable("Unexpected intrinsic");
2333 } else if (Name.starts_with("packusdw.")) {
2334 if (VecWidth == 128)
2335 IID = Intrinsic::x86_sse41_packusdw;
2336 else if (VecWidth == 256)
2337 IID = Intrinsic::x86_avx2_packusdw;
2338 else if (VecWidth == 512)
2339 IID = Intrinsic::x86_avx512_packusdw_512;
2340 else
2341 llvm_unreachable("Unexpected intrinsic");
2342 } else if (Name.starts_with("vpermilvar.")) {
2343 if (VecWidth == 128 && EltWidth == 32)
2344 IID = Intrinsic::x86_avx_vpermilvar_ps;
2345 else if (VecWidth == 128 && EltWidth == 64)
2346 IID = Intrinsic::x86_avx_vpermilvar_pd;
2347 else if (VecWidth == 256 && EltWidth == 32)
2348 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2349 else if (VecWidth == 256 && EltWidth == 64)
2350 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2351 else if (VecWidth == 512 && EltWidth == 32)
2352 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2353 else if (VecWidth == 512 && EltWidth == 64)
2354 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2355 else
2356 llvm_unreachable("Unexpected intrinsic");
2357 } else if (Name == "cvtpd2dq.256") {
2358 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2359 } else if (Name == "cvtpd2ps.256") {
2360 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2361 } else if (Name == "cvttpd2dq.256") {
2362 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2363 } else if (Name == "cvttps2dq.128") {
2364 IID = Intrinsic::x86_sse2_cvttps2dq;
2365 } else if (Name == "cvttps2dq.256") {
2366 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2367 } else if (Name.starts_with("permvar.")) {
2368 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2369 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2370 IID = Intrinsic::x86_avx2_permps;
2371 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2372 IID = Intrinsic::x86_avx2_permd;
2373 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2374 IID = Intrinsic::x86_avx512_permvar_df_256;
2375 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2376 IID = Intrinsic::x86_avx512_permvar_di_256;
2377 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2378 IID = Intrinsic::x86_avx512_permvar_sf_512;
2379 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2380 IID = Intrinsic::x86_avx512_permvar_si_512;
2381 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2382 IID = Intrinsic::x86_avx512_permvar_df_512;
2383 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2384 IID = Intrinsic::x86_avx512_permvar_di_512;
2385 else if (VecWidth == 128 && EltWidth == 16)
2386 IID = Intrinsic::x86_avx512_permvar_hi_128;
2387 else if (VecWidth == 256 && EltWidth == 16)
2388 IID = Intrinsic::x86_avx512_permvar_hi_256;
2389 else if (VecWidth == 512 && EltWidth == 16)
2390 IID = Intrinsic::x86_avx512_permvar_hi_512;
2391 else if (VecWidth == 128 && EltWidth == 8)
2392 IID = Intrinsic::x86_avx512_permvar_qi_128;
2393 else if (VecWidth == 256 && EltWidth == 8)
2394 IID = Intrinsic::x86_avx512_permvar_qi_256;
2395 else if (VecWidth == 512 && EltWidth == 8)
2396 IID = Intrinsic::x86_avx512_permvar_qi_512;
2397 else
2398 llvm_unreachable("Unexpected intrinsic");
2399 } else if (Name.starts_with("dbpsadbw.")) {
2400 if (VecWidth == 128)
2401 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2402 else if (VecWidth == 256)
2403 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2404 else if (VecWidth == 512)
2405 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2406 else
2407 llvm_unreachable("Unexpected intrinsic");
2408 } else if (Name.starts_with("pmultishift.qb.")) {
2409 if (VecWidth == 128)
2410 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2411 else if (VecWidth == 256)
2412 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2413 else if (VecWidth == 512)
2414 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2415 else
2416 llvm_unreachable("Unexpected intrinsic");
2417 } else if (Name.starts_with("conflict.")) {
2418 if (Name[9] == 'd' && VecWidth == 128)
2419 IID = Intrinsic::x86_avx512_conflict_d_128;
2420 else if (Name[9] == 'd' && VecWidth == 256)
2421 IID = Intrinsic::x86_avx512_conflict_d_256;
2422 else if (Name[9] == 'd' && VecWidth == 512)
2423 IID = Intrinsic::x86_avx512_conflict_d_512;
2424 else if (Name[9] == 'q' && VecWidth == 128)
2425 IID = Intrinsic::x86_avx512_conflict_q_128;
2426 else if (Name[9] == 'q' && VecWidth == 256)
2427 IID = Intrinsic::x86_avx512_conflict_q_256;
2428 else if (Name[9] == 'q' && VecWidth == 512)
2429 IID = Intrinsic::x86_avx512_conflict_q_512;
2430 else
2431 llvm_unreachable("Unexpected intrinsic");
2432 } else if (Name.starts_with("pavg.")) {
2433 if (Name[5] == 'b' && VecWidth == 128)
2434 IID = Intrinsic::x86_sse2_pavg_b;
2435 else if (Name[5] == 'b' && VecWidth == 256)
2436 IID = Intrinsic::x86_avx2_pavg_b;
2437 else if (Name[5] == 'b' && VecWidth == 512)
2438 IID = Intrinsic::x86_avx512_pavg_b_512;
2439 else if (Name[5] == 'w' && VecWidth == 128)
2440 IID = Intrinsic::x86_sse2_pavg_w;
2441 else if (Name[5] == 'w' && VecWidth == 256)
2442 IID = Intrinsic::x86_avx2_pavg_w;
2443 else if (Name[5] == 'w' && VecWidth == 512)
2444 IID = Intrinsic::x86_avx512_pavg_w_512;
2445 else
2446 llvm_unreachable("Unexpected intrinsic");
2447 } else
2448 return false;
2449
2450 SmallVector<Value *, 4> Args(CI.args());
2451 Args.pop_back();
2452 Args.pop_back();
2453 Rep = Builder.CreateIntrinsic(IID, Args);
2454 unsigned NumArgs = CI.arg_size();
2455 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2456 CI.getArgOperand(NumArgs - 2));
2457 return true;
2458}
2459
2460/// Upgrade comment in call to inline asm that represents an objc retain release
2461/// marker.
2462void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2463 size_t Pos;
2464 if (AsmStr->find("mov\tfp") == 0 &&
2465 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2466 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2467 AsmStr->replace(Pos, 1, ";");
2468 }
2469}
2470
2472 Function *F, IRBuilder<> &Builder) {
2473 Value *Rep = nullptr;
2474
2475 if (Name == "abs.i" || Name == "abs.ll") {
2476 Value *Arg = CI->getArgOperand(0);
2477 Value *Neg = Builder.CreateNeg(Arg, "neg");
2478 Value *Cmp = Builder.CreateICmpSGE(
2479 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2480 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2481 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2482 Type *Ty = (Name == "abs.bf16")
2483 ? Builder.getBFloatTy()
2484 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2485 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2486 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2487 Rep = Builder.CreateBitCast(Abs, CI->getType());
2488 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2489 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2490 : Intrinsic::nvvm_fabs;
2491 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2492 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2493 Name.starts_with("atomic.load.add.f64.p")) {
2494 Value *Ptr = CI->getArgOperand(0);
2495 Value *Val = CI->getArgOperand(1);
2496 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2498 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2499 Name.starts_with("atomic.load.dec.32.p")) {
2500 Value *Ptr = CI->getArgOperand(0);
2501 Value *Val = CI->getArgOperand(1);
2502 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2504 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2506 } else if (Name.consume_front("max.") &&
2507 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2508 Name == "ui" || Name == "ull")) {
2509 Value *Arg0 = CI->getArgOperand(0);
2510 Value *Arg1 = CI->getArgOperand(1);
2511 Value *Cmp = Name.starts_with("u")
2512 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2513 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2514 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2515 } else if (Name.consume_front("min.") &&
2516 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2517 Name == "ui" || Name == "ull")) {
2518 Value *Arg0 = CI->getArgOperand(0);
2519 Value *Arg1 = CI->getArgOperand(1);
2520 Value *Cmp = Name.starts_with("u")
2521 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2522 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2523 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2524 } else if (Name == "clz.ll") {
2525 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2526 Value *Arg = CI->getArgOperand(0);
2527 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2528 {Arg, Builder.getFalse()},
2529 /*FMFSource=*/nullptr, "ctlz");
2530 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2531 } else if (Name == "popc.ll") {
2532 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2533 // i64.
2534 Value *Arg = CI->getArgOperand(0);
2535 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2536 Arg, /*FMFSource=*/nullptr, "ctpop");
2537 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2538 } else if (Name == "h2f") {
2539 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2540 {Builder.getFloatTy()}, CI->getArgOperand(0),
2541 /*FMFSource=*/nullptr, "h2f");
2542 } else if (Name.consume_front("bitcast.") &&
2543 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2544 Name == "d2ll")) {
2545 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2546 } else if (Name == "rotate.b32") {
2547 Value *Arg = CI->getOperand(0);
2548 Value *ShiftAmt = CI->getOperand(1);
2549 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2550 {Arg, Arg, ShiftAmt});
2551 } else if (Name == "rotate.b64") {
2552 Type *Int64Ty = Builder.getInt64Ty();
2553 Value *Arg = CI->getOperand(0);
2554 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2555 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2556 {Arg, Arg, ZExtShiftAmt});
2557 } else if (Name == "rotate.right.b64") {
2558 Type *Int64Ty = Builder.getInt64Ty();
2559 Value *Arg = CI->getOperand(0);
2560 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2561 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2562 {Arg, Arg, ZExtShiftAmt});
2563 } else if (Name == "swap.lo.hi.b64") {
2564 Type *Int64Ty = Builder.getInt64Ty();
2565 Value *Arg = CI->getOperand(0);
2566 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2567 {Arg, Arg, Builder.getInt64(32)});
2568 } else if ((Name.consume_front("ptr.gen.to.") &&
2569 consumeNVVMPtrAddrSpace(Name)) ||
2570 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2571 Name.starts_with(".to.gen"))) {
2572 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2573 } else if (Name.consume_front("ldg.global")) {
2574 Value *Ptr = CI->getArgOperand(0);
2575 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2576 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2577 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2578 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2579 MDNode *MD = MDNode::get(Builder.getContext(), {});
2580 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2581 return LD;
2582 } else if (Name == "tanh.approx.f32") {
2583 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2584 FastMathFlags FMF;
2585 FMF.setApproxFunc();
2586 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2587 FMF);
2588 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2589 Value *Arg =
2590 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2591 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2592 {}, {Arg});
2593 } else if (Name == "barrier") {
2594 Rep = Builder.CreateIntrinsic(
2595 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2596 {CI->getArgOperand(0), CI->getArgOperand(1)});
2597 } else if (Name == "barrier.sync") {
2598 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2599 {CI->getArgOperand(0)});
2600 } else if (Name == "barrier.sync.cnt") {
2601 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2602 {CI->getArgOperand(0), CI->getArgOperand(1)});
2603 } else {
2605 if (IID != Intrinsic::not_intrinsic &&
2606 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2607 rename(F);
2608 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2610 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2611 Value *Arg = CI->getArgOperand(I);
2612 Type *OldType = Arg->getType();
2613 Type *NewType = NewFn->getArg(I)->getType();
2614 Args.push_back(
2615 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2616 ? Builder.CreateBitCast(Arg, NewType)
2617 : Arg);
2618 }
2619 Rep = Builder.CreateCall(NewFn, Args);
2620 if (F->getReturnType()->isIntegerTy())
2621 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2622 }
2623 }
2624
2625 return Rep;
2626}
2627
2629 IRBuilder<> &Builder) {
2630 LLVMContext &C = F->getContext();
2631 Value *Rep = nullptr;
2632
2633 if (Name.starts_with("sse4a.movnt.")) {
2635 Elts.push_back(
2636 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2637 MDNode *Node = MDNode::get(C, Elts);
2638
2639 Value *Arg0 = CI->getArgOperand(0);
2640 Value *Arg1 = CI->getArgOperand(1);
2641
2642 // Nontemporal (unaligned) store of the 0'th element of the float/double
2643 // vector.
2644 Value *Extract =
2645 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2646
2647 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2648 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2649 } else if (Name.starts_with("avx.movnt.") ||
2650 Name.starts_with("avx512.storent.")) {
2652 Elts.push_back(
2653 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2654 MDNode *Node = MDNode::get(C, Elts);
2655
2656 Value *Arg0 = CI->getArgOperand(0);
2657 Value *Arg1 = CI->getArgOperand(1);
2658
2659 StoreInst *SI = Builder.CreateAlignedStore(
2660 Arg1, Arg0,
2662 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2663 } else if (Name == "sse2.storel.dq") {
2664 Value *Arg0 = CI->getArgOperand(0);
2665 Value *Arg1 = CI->getArgOperand(1);
2666
2667 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2668 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2669 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2670 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2671 } else if (Name.starts_with("sse.storeu.") ||
2672 Name.starts_with("sse2.storeu.") ||
2673 Name.starts_with("avx.storeu.")) {
2674 Value *Arg0 = CI->getArgOperand(0);
2675 Value *Arg1 = CI->getArgOperand(1);
2676 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2677 } else if (Name == "avx512.mask.store.ss") {
2678 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2679 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2680 Mask, false);
2681 } else if (Name.starts_with("avx512.mask.store")) {
2682 // "avx512.mask.storeu." or "avx512.mask.store."
2683 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2684 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2685 CI->getArgOperand(2), Aligned);
2686 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2687 // Upgrade packed integer vector compare intrinsics to compare instructions.
2688 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2689 bool CmpEq = Name[9] == 'e';
2690 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2691 CI->getArgOperand(0), CI->getArgOperand(1));
2692 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2693 } else if (Name.starts_with("avx512.broadcastm")) {
2694 Type *ExtTy = Type::getInt32Ty(C);
2695 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2696 ExtTy = Type::getInt64Ty(C);
2697 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2698 ExtTy->getPrimitiveSizeInBits();
2699 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2700 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2701 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2702 Value *Vec = CI->getArgOperand(0);
2703 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2704 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2705 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2706 } else if (Name.starts_with("avx.sqrt.p") ||
2707 Name.starts_with("sse2.sqrt.p") ||
2708 Name.starts_with("sse.sqrt.p")) {
2709 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2710 {CI->getArgOperand(0)});
2711 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2712 if (CI->arg_size() == 4 &&
2713 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2714 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2715 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2716 : Intrinsic::x86_avx512_sqrt_pd_512;
2717
2718 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2719 Rep = Builder.CreateIntrinsic(IID, Args);
2720 } else {
2721 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2722 {CI->getArgOperand(0)});
2723 }
2724 Rep =
2725 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2726 } else if (Name.starts_with("avx512.ptestm") ||
2727 Name.starts_with("avx512.ptestnm")) {
2728 Value *Op0 = CI->getArgOperand(0);
2729 Value *Op1 = CI->getArgOperand(1);
2730 Value *Mask = CI->getArgOperand(2);
2731 Rep = Builder.CreateAnd(Op0, Op1);
2732 llvm::Type *Ty = Op0->getType();
2734 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2737 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2738 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2739 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2740 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2741 ->getNumElements();
2742 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2743 Rep =
2744 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2745 } else if (Name.starts_with("avx512.kunpck")) {
2746 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2747 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2748 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2749 int Indices[64];
2750 for (unsigned i = 0; i != NumElts; ++i)
2751 Indices[i] = i;
2752
2753 // First extract half of each vector. This gives better codegen than
2754 // doing it in a single shuffle.
2755 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2756 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2757 // Concat the vectors.
2758 // NOTE: Operands have to be swapped to match intrinsic definition.
2759 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2760 Rep = Builder.CreateBitCast(Rep, CI->getType());
2761 } else if (Name == "avx512.kand.w") {
2762 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2763 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2764 Rep = Builder.CreateAnd(LHS, RHS);
2765 Rep = Builder.CreateBitCast(Rep, CI->getType());
2766 } else if (Name == "avx512.kandn.w") {
2767 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2768 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2769 LHS = Builder.CreateNot(LHS);
2770 Rep = Builder.CreateAnd(LHS, RHS);
2771 Rep = Builder.CreateBitCast(Rep, CI->getType());
2772 } else if (Name == "avx512.kor.w") {
2773 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2774 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2775 Rep = Builder.CreateOr(LHS, RHS);
2776 Rep = Builder.CreateBitCast(Rep, CI->getType());
2777 } else if (Name == "avx512.kxor.w") {
2778 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2779 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2780 Rep = Builder.CreateXor(LHS, RHS);
2781 Rep = Builder.CreateBitCast(Rep, CI->getType());
2782 } else if (Name == "avx512.kxnor.w") {
2783 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2784 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2785 LHS = Builder.CreateNot(LHS);
2786 Rep = Builder.CreateXor(LHS, RHS);
2787 Rep = Builder.CreateBitCast(Rep, CI->getType());
2788 } else if (Name == "avx512.knot.w") {
2789 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2790 Rep = Builder.CreateNot(Rep);
2791 Rep = Builder.CreateBitCast(Rep, CI->getType());
2792 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2793 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2794 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2795 Rep = Builder.CreateOr(LHS, RHS);
2796 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2797 Value *C;
2798 if (Name[14] == 'c')
2799 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2800 else
2801 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2802 Rep = Builder.CreateICmpEQ(Rep, C);
2803 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2804 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2805 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2806 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2807 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2808 Type *I32Ty = Type::getInt32Ty(C);
2809 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2810 ConstantInt::get(I32Ty, 0));
2811 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2812 ConstantInt::get(I32Ty, 0));
2813 Value *EltOp;
2814 if (Name.contains(".add."))
2815 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2816 else if (Name.contains(".sub."))
2817 EltOp = Builder.CreateFSub(Elt0, Elt1);
2818 else if (Name.contains(".mul."))
2819 EltOp = Builder.CreateFMul(Elt0, Elt1);
2820 else
2821 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2822 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2823 ConstantInt::get(I32Ty, 0));
2824 } else if (Name.starts_with("avx512.mask.pcmp")) {
2825 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2826 bool CmpEq = Name[16] == 'e';
2827 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2828 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2829 Type *OpTy = CI->getArgOperand(0)->getType();
2830 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2831 Intrinsic::ID IID;
2832 switch (VecWidth) {
2833 default:
2834 llvm_unreachable("Unexpected intrinsic");
2835 case 128:
2836 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2837 break;
2838 case 256:
2839 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2840 break;
2841 case 512:
2842 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2843 break;
2844 }
2845
2846 Rep =
2847 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2848 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2849 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2850 Type *OpTy = CI->getArgOperand(0)->getType();
2851 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2852 unsigned EltWidth = OpTy->getScalarSizeInBits();
2853 Intrinsic::ID IID;
2854 if (VecWidth == 128 && EltWidth == 32)
2855 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2856 else if (VecWidth == 256 && EltWidth == 32)
2857 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2858 else if (VecWidth == 512 && EltWidth == 32)
2859 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2860 else if (VecWidth == 128 && EltWidth == 64)
2861 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2862 else if (VecWidth == 256 && EltWidth == 64)
2863 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2864 else if (VecWidth == 512 && EltWidth == 64)
2865 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2866 else
2867 llvm_unreachable("Unexpected intrinsic");
2868
2869 Rep =
2870 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2871 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2872 } else if (Name.starts_with("avx512.cmp.p")) {
2873 SmallVector<Value *, 4> Args(CI->args());
2874 Type *OpTy = Args[0]->getType();
2875 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2876 unsigned EltWidth = OpTy->getScalarSizeInBits();
2877 Intrinsic::ID IID;
2878 if (VecWidth == 128 && EltWidth == 32)
2879 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2880 else if (VecWidth == 256 && EltWidth == 32)
2881 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2882 else if (VecWidth == 512 && EltWidth == 32)
2883 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2884 else if (VecWidth == 128 && EltWidth == 64)
2885 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2886 else if (VecWidth == 256 && EltWidth == 64)
2887 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2888 else if (VecWidth == 512 && EltWidth == 64)
2889 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2890 else
2891 llvm_unreachable("Unexpected intrinsic");
2892
2894 if (VecWidth == 512)
2895 std::swap(Mask, Args.back());
2896 Args.push_back(Mask);
2897
2898 Rep = Builder.CreateIntrinsic(IID, Args);
2899 } else if (Name.starts_with("avx512.mask.cmp.")) {
2900 // Integer compare intrinsics.
2901 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2902 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2903 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2904 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2905 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2906 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2907 Name.starts_with("avx512.cvtw2mask.") ||
2908 Name.starts_with("avx512.cvtd2mask.") ||
2909 Name.starts_with("avx512.cvtq2mask.")) {
2910 Value *Op = CI->getArgOperand(0);
2911 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2912 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2913 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2914 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2915 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2916 Name.starts_with("avx512.mask.pabs")) {
2917 Rep = upgradeAbs(Builder, *CI);
2918 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2919 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2920 Name.starts_with("avx512.mask.pmaxs")) {
2921 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2922 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2923 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2924 Name.starts_with("avx512.mask.pmaxu")) {
2925 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2926 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2927 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2928 Name.starts_with("avx512.mask.pmins")) {
2929 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2930 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2931 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2932 Name.starts_with("avx512.mask.pminu")) {
2933 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2934 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2935 Name == "avx512.pmulu.dq.512" ||
2936 Name.starts_with("avx512.mask.pmulu.dq.")) {
2937 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2938 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2939 Name == "avx512.pmul.dq.512" ||
2940 Name.starts_with("avx512.mask.pmul.dq.")) {
2941 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2942 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2943 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2944 Rep =
2945 Builder.CreateSIToFP(CI->getArgOperand(1),
2946 cast<VectorType>(CI->getType())->getElementType());
2947 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2948 } else if (Name == "avx512.cvtusi2sd") {
2949 Rep =
2950 Builder.CreateUIToFP(CI->getArgOperand(1),
2951 cast<VectorType>(CI->getType())->getElementType());
2952 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2953 } else if (Name == "sse2.cvtss2sd") {
2954 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2955 Rep = Builder.CreateFPExt(
2956 Rep, cast<VectorType>(CI->getType())->getElementType());
2957 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2958 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2959 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2960 Name.starts_with("avx512.mask.cvtdq2pd.") ||
2961 Name.starts_with("avx512.mask.cvtudq2pd.") ||
2962 Name.starts_with("avx512.mask.cvtdq2ps.") ||
2963 Name.starts_with("avx512.mask.cvtudq2ps.") ||
2964 Name.starts_with("avx512.mask.cvtqq2pd.") ||
2965 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2966 Name == "avx512.mask.cvtqq2ps.256" ||
2967 Name == "avx512.mask.cvtqq2ps.512" ||
2968 Name == "avx512.mask.cvtuqq2ps.256" ||
2969 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2970 Name == "avx.cvt.ps2.pd.256" ||
2971 Name == "avx512.mask.cvtps2pd.128" ||
2972 Name == "avx512.mask.cvtps2pd.256") {
2973 auto *DstTy = cast<FixedVectorType>(CI->getType());
2974 Rep = CI->getArgOperand(0);
2975 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2976
2977 unsigned NumDstElts = DstTy->getNumElements();
2978 if (NumDstElts < SrcTy->getNumElements()) {
2979 assert(NumDstElts == 2 && "Unexpected vector size");
2980 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2981 }
2982
2983 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2984 bool IsUnsigned = Name.contains("cvtu");
2985 if (IsPS2PD)
2986 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2987 else if (CI->arg_size() == 4 &&
2988 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2989 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2990 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2991 : Intrinsic::x86_avx512_sitofp_round;
2992 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
2993 {Rep, CI->getArgOperand(3)});
2994 } else {
2995 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2996 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2997 }
2998
2999 if (CI->arg_size() >= 3)
3000 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3001 CI->getArgOperand(1));
3002 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3003 Name.starts_with("vcvtph2ps.")) {
3004 auto *DstTy = cast<FixedVectorType>(CI->getType());
3005 Rep = CI->getArgOperand(0);
3006 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3007 unsigned NumDstElts = DstTy->getNumElements();
3008 if (NumDstElts != SrcTy->getNumElements()) {
3009 assert(NumDstElts == 4 && "Unexpected vector size");
3010 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3011 }
3012 Rep = Builder.CreateBitCast(
3013 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3014 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3015 if (CI->arg_size() >= 3)
3016 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3017 CI->getArgOperand(1));
3018 } else if (Name.starts_with("avx512.mask.load")) {
3019 // "avx512.mask.loadu." or "avx512.mask.load."
3020 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3021 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3022 CI->getArgOperand(2), Aligned);
3023 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3024 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3025 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3026 ResultTy->getNumElements());
3027
3028 Rep = Builder.CreateIntrinsic(
3029 Intrinsic::masked_expandload, ResultTy,
3030 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3031 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3032 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3033 Value *MaskVec =
3034 getX86MaskVec(Builder, CI->getArgOperand(2),
3035 cast<FixedVectorType>(ResultTy)->getNumElements());
3036
3037 Rep = Builder.CreateIntrinsic(
3038 Intrinsic::masked_compressstore, ResultTy,
3039 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3040 } else if (Name.starts_with("avx512.mask.compress.") ||
3041 Name.starts_with("avx512.mask.expand.")) {
3042 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3043
3044 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3045 ResultTy->getNumElements());
3046
3047 bool IsCompress = Name[12] == 'c';
3048 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3049 : Intrinsic::x86_avx512_mask_expand;
3050 Rep = Builder.CreateIntrinsic(
3051 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3052 } else if (Name.starts_with("xop.vpcom")) {
3053 bool IsSigned;
3054 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3055 Name.ends_with("uq"))
3056 IsSigned = false;
3057 else if (Name.ends_with("b") || Name.ends_with("w") ||
3058 Name.ends_with("d") || Name.ends_with("q"))
3059 IsSigned = true;
3060 else
3061 llvm_unreachable("Unknown suffix");
3062
3063 unsigned Imm;
3064 if (CI->arg_size() == 3) {
3065 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3066 } else {
3067 Name = Name.substr(9); // strip off "xop.vpcom"
3068 if (Name.starts_with("lt"))
3069 Imm = 0;
3070 else if (Name.starts_with("le"))
3071 Imm = 1;
3072 else if (Name.starts_with("gt"))
3073 Imm = 2;
3074 else if (Name.starts_with("ge"))
3075 Imm = 3;
3076 else if (Name.starts_with("eq"))
3077 Imm = 4;
3078 else if (Name.starts_with("ne"))
3079 Imm = 5;
3080 else if (Name.starts_with("false"))
3081 Imm = 6;
3082 else if (Name.starts_with("true"))
3083 Imm = 7;
3084 else
3085 llvm_unreachable("Unknown condition");
3086 }
3087
3088 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3089 } else if (Name.starts_with("xop.vpcmov")) {
3090 Value *Sel = CI->getArgOperand(2);
3091 Value *NotSel = Builder.CreateNot(Sel);
3092 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3093 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3094 Rep = Builder.CreateOr(Sel0, Sel1);
3095 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3096 Name.starts_with("avx512.mask.prol")) {
3097 Rep = upgradeX86Rotate(Builder, *CI, false);
3098 } else if (Name.starts_with("avx512.pror") ||
3099 Name.starts_with("avx512.mask.pror")) {
3100 Rep = upgradeX86Rotate(Builder, *CI, true);
3101 } else if (Name.starts_with("avx512.vpshld.") ||
3102 Name.starts_with("avx512.mask.vpshld") ||
3103 Name.starts_with("avx512.maskz.vpshld")) {
3104 bool ZeroMask = Name[11] == 'z';
3105 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3106 } else if (Name.starts_with("avx512.vpshrd.") ||
3107 Name.starts_with("avx512.mask.vpshrd") ||
3108 Name.starts_with("avx512.maskz.vpshrd")) {
3109 bool ZeroMask = Name[11] == 'z';
3110 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3111 } else if (Name == "sse42.crc32.64.8") {
3112 Value *Trunc0 =
3113 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3114 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3115 {Trunc0, CI->getArgOperand(1)});
3116 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3117 } else if (Name.starts_with("avx.vbroadcast.s") ||
3118 Name.starts_with("avx512.vbroadcast.s")) {
3119 // Replace broadcasts with a series of insertelements.
3120 auto *VecTy = cast<FixedVectorType>(CI->getType());
3121 Type *EltTy = VecTy->getElementType();
3122 unsigned EltNum = VecTy->getNumElements();
3123 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3124 Type *I32Ty = Type::getInt32Ty(C);
3125 Rep = PoisonValue::get(VecTy);
3126 for (unsigned I = 0; I < EltNum; ++I)
3127 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3128 } else if (Name.starts_with("sse41.pmovsx") ||
3129 Name.starts_with("sse41.pmovzx") ||
3130 Name.starts_with("avx2.pmovsx") ||
3131 Name.starts_with("avx2.pmovzx") ||
3132 Name.starts_with("avx512.mask.pmovsx") ||
3133 Name.starts_with("avx512.mask.pmovzx")) {
3134 auto *DstTy = cast<FixedVectorType>(CI->getType());
3135 unsigned NumDstElts = DstTy->getNumElements();
3136
3137 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3138 SmallVector<int, 8> ShuffleMask(NumDstElts);
3139 for (unsigned i = 0; i != NumDstElts; ++i)
3140 ShuffleMask[i] = i;
3141
3142 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3143
3144 bool DoSext = Name.contains("pmovsx");
3145 Rep =
3146 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3147 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3148 if (CI->arg_size() == 3)
3149 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3150 CI->getArgOperand(1));
3151 } else if (Name == "avx512.mask.pmov.qd.256" ||
3152 Name == "avx512.mask.pmov.qd.512" ||
3153 Name == "avx512.mask.pmov.wb.256" ||
3154 Name == "avx512.mask.pmov.wb.512") {
3155 Type *Ty = CI->getArgOperand(1)->getType();
3156 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3157 Rep =
3158 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3159 } else if (Name.starts_with("avx.vbroadcastf128") ||
3160 Name == "avx2.vbroadcasti128") {
3161 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3162 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3163 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3164 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3165 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3166 if (NumSrcElts == 2)
3167 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3168 else
3169 Rep = Builder.CreateShuffleVector(Load,
3170 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3171 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3172 Name.starts_with("avx512.mask.shuf.f")) {
3173 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3174 Type *VT = CI->getType();
3175 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3176 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3177 unsigned ControlBitsMask = NumLanes - 1;
3178 unsigned NumControlBits = NumLanes / 2;
3179 SmallVector<int, 8> ShuffleMask(0);
3180
3181 for (unsigned l = 0; l != NumLanes; ++l) {
3182 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3183 // We actually need the other source.
3184 if (l >= NumLanes / 2)
3185 LaneMask += NumLanes;
3186 for (unsigned i = 0; i != NumElementsInLane; ++i)
3187 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3188 }
3189 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3190 CI->getArgOperand(1), ShuffleMask);
3191 Rep =
3192 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3193 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3194 Name.starts_with("avx512.mask.broadcasti")) {
3195 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3196 ->getNumElements();
3197 unsigned NumDstElts =
3198 cast<FixedVectorType>(CI->getType())->getNumElements();
3199
3200 SmallVector<int, 8> ShuffleMask(NumDstElts);
3201 for (unsigned i = 0; i != NumDstElts; ++i)
3202 ShuffleMask[i] = i % NumSrcElts;
3203
3204 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3205 CI->getArgOperand(0), ShuffleMask);
3206 Rep =
3207 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3208 } else if (Name.starts_with("avx2.pbroadcast") ||
3209 Name.starts_with("avx2.vbroadcast") ||
3210 Name.starts_with("avx512.pbroadcast") ||
3211 Name.starts_with("avx512.mask.broadcast.s")) {
3212 // Replace vp?broadcasts with a vector shuffle.
3213 Value *Op = CI->getArgOperand(0);
3214 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3215 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3218 Rep = Builder.CreateShuffleVector(Op, M);
3219
3220 if (CI->arg_size() == 3)
3221 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3222 CI->getArgOperand(1));
3223 } else if (Name.starts_with("sse2.padds.") ||
3224 Name.starts_with("avx2.padds.") ||
3225 Name.starts_with("avx512.padds.") ||
3226 Name.starts_with("avx512.mask.padds.")) {
3227 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3228 } else if (Name.starts_with("sse2.psubs.") ||
3229 Name.starts_with("avx2.psubs.") ||
3230 Name.starts_with("avx512.psubs.") ||
3231 Name.starts_with("avx512.mask.psubs.")) {
3232 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3233 } else if (Name.starts_with("sse2.paddus.") ||
3234 Name.starts_with("avx2.paddus.") ||
3235 Name.starts_with("avx512.mask.paddus.")) {
3236 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3237 } else if (Name.starts_with("sse2.psubus.") ||
3238 Name.starts_with("avx2.psubus.") ||
3239 Name.starts_with("avx512.mask.psubus.")) {
3240 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3241 } else if (Name.starts_with("avx512.mask.palignr.")) {
3242 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3243 CI->getArgOperand(1), CI->getArgOperand(2),
3244 CI->getArgOperand(3), CI->getArgOperand(4),
3245 false);
3246 } else if (Name.starts_with("avx512.mask.valign.")) {
3248 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3249 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3250 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3251 // 128/256-bit shift left specified in bits.
3252 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3253 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3254 Shift / 8); // Shift is in bits.
3255 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3256 // 128/256-bit shift right specified in bits.
3257 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3258 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3259 Shift / 8); // Shift is in bits.
3260 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3261 Name == "avx512.psll.dq.512") {
3262 // 128/256/512-bit shift left specified in bytes.
3263 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3264 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3265 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3266 Name == "avx512.psrl.dq.512") {
3267 // 128/256/512-bit shift right specified in bytes.
3268 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3269 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3270 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3271 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3272 Name.starts_with("avx2.pblendd.")) {
3273 Value *Op0 = CI->getArgOperand(0);
3274 Value *Op1 = CI->getArgOperand(1);
3275 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3276 auto *VecTy = cast<FixedVectorType>(CI->getType());
3277 unsigned NumElts = VecTy->getNumElements();
3278
3279 SmallVector<int, 16> Idxs(NumElts);
3280 for (unsigned i = 0; i != NumElts; ++i)
3281 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3282
3283 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3284 } else if (Name.starts_with("avx.vinsertf128.") ||
3285 Name == "avx2.vinserti128" ||
3286 Name.starts_with("avx512.mask.insert")) {
3287 Value *Op0 = CI->getArgOperand(0);
3288 Value *Op1 = CI->getArgOperand(1);
3289 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3290 unsigned DstNumElts =
3291 cast<FixedVectorType>(CI->getType())->getNumElements();
3292 unsigned SrcNumElts =
3293 cast<FixedVectorType>(Op1->getType())->getNumElements();
3294 unsigned Scale = DstNumElts / SrcNumElts;
3295
3296 // Mask off the high bits of the immediate value; hardware ignores those.
3297 Imm = Imm % Scale;
3298
3299 // Extend the second operand into a vector the size of the destination.
3300 SmallVector<int, 8> Idxs(DstNumElts);
3301 for (unsigned i = 0; i != SrcNumElts; ++i)
3302 Idxs[i] = i;
3303 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3304 Idxs[i] = SrcNumElts;
3305 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3306
3307 // Insert the second operand into the first operand.
3308
3309 // Note that there is no guarantee that instruction lowering will actually
3310 // produce a vinsertf128 instruction for the created shuffles. In
3311 // particular, the 0 immediate case involves no lane changes, so it can
3312 // be handled as a blend.
3313
3314 // Example of shuffle mask for 32-bit elements:
3315 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3316 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3317
3318 // First fill with identify mask.
3319 for (unsigned i = 0; i != DstNumElts; ++i)
3320 Idxs[i] = i;
3321 // Then replace the elements where we need to insert.
3322 for (unsigned i = 0; i != SrcNumElts; ++i)
3323 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3324 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3325
3326 // If the intrinsic has a mask operand, handle that.
3327 if (CI->arg_size() == 5)
3328 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3329 CI->getArgOperand(3));
3330 } else if (Name.starts_with("avx.vextractf128.") ||
3331 Name == "avx2.vextracti128" ||
3332 Name.starts_with("avx512.mask.vextract")) {
3333 Value *Op0 = CI->getArgOperand(0);
3334 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3335 unsigned DstNumElts =
3336 cast<FixedVectorType>(CI->getType())->getNumElements();
3337 unsigned SrcNumElts =
3338 cast<FixedVectorType>(Op0->getType())->getNumElements();
3339 unsigned Scale = SrcNumElts / DstNumElts;
3340
3341 // Mask off the high bits of the immediate value; hardware ignores those.
3342 Imm = Imm % Scale;
3343
3344 // Get indexes for the subvector of the input vector.
3345 SmallVector<int, 8> Idxs(DstNumElts);
3346 for (unsigned i = 0; i != DstNumElts; ++i) {
3347 Idxs[i] = i + (Imm * DstNumElts);
3348 }
3349 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3350
3351 // If the intrinsic has a mask operand, handle that.
3352 if (CI->arg_size() == 4)
3353 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3354 CI->getArgOperand(2));
3355 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3356 Name.starts_with("avx512.mask.perm.di.")) {
3357 Value *Op0 = CI->getArgOperand(0);
3358 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3359 auto *VecTy = cast<FixedVectorType>(CI->getType());
3360 unsigned NumElts = VecTy->getNumElements();
3361
3362 SmallVector<int, 8> Idxs(NumElts);
3363 for (unsigned i = 0; i != NumElts; ++i)
3364 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3365
3366 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3367
3368 if (CI->arg_size() == 4)
3369 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3370 CI->getArgOperand(2));
3371 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3372 // The immediate permute control byte looks like this:
3373 // [1:0] - select 128 bits from sources for low half of destination
3374 // [2] - ignore
3375 // [3] - zero low half of destination
3376 // [5:4] - select 128 bits from sources for high half of destination
3377 // [6] - ignore
3378 // [7] - zero high half of destination
3379
3380 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3381
3382 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3383 unsigned HalfSize = NumElts / 2;
3384 SmallVector<int, 8> ShuffleMask(NumElts);
3385
3386 // Determine which operand(s) are actually in use for this instruction.
3387 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3388 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3389
3390 // If needed, replace operands based on zero mask.
3391 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3392 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3393
3394 // Permute low half of result.
3395 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3396 for (unsigned i = 0; i < HalfSize; ++i)
3397 ShuffleMask[i] = StartIndex + i;
3398
3399 // Permute high half of result.
3400 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3401 for (unsigned i = 0; i < HalfSize; ++i)
3402 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3403
3404 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3405
3406 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3407 Name.starts_with("avx512.mask.vpermil.p") ||
3408 Name.starts_with("avx512.mask.pshuf.d.")) {
3409 Value *Op0 = CI->getArgOperand(0);
3410 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3411 auto *VecTy = cast<FixedVectorType>(CI->getType());
3412 unsigned NumElts = VecTy->getNumElements();
3413 // Calculate the size of each index in the immediate.
3414 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3415 unsigned IdxMask = ((1 << IdxSize) - 1);
3416
3417 SmallVector<int, 8> Idxs(NumElts);
3418 // Lookup the bits for this element, wrapping around the immediate every
3419 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3420 // to offset by the first index of each group.
3421 for (unsigned i = 0; i != NumElts; ++i)
3422 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3423
3424 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3425
3426 if (CI->arg_size() == 4)
3427 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3428 CI->getArgOperand(2));
3429 } else if (Name == "sse2.pshufl.w" ||
3430 Name.starts_with("avx512.mask.pshufl.w.")) {
3431 Value *Op0 = CI->getArgOperand(0);
3432 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3433 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3434
3435 SmallVector<int, 16> Idxs(NumElts);
3436 for (unsigned l = 0; l != NumElts; l += 8) {
3437 for (unsigned i = 0; i != 4; ++i)
3438 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3439 for (unsigned i = 4; i != 8; ++i)
3440 Idxs[i + l] = i + l;
3441 }
3442
3443 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3444
3445 if (CI->arg_size() == 4)
3446 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3447 CI->getArgOperand(2));
3448 } else if (Name == "sse2.pshufh.w" ||
3449 Name.starts_with("avx512.mask.pshufh.w.")) {
3450 Value *Op0 = CI->getArgOperand(0);
3451 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3452 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3453
3454 SmallVector<int, 16> Idxs(NumElts);
3455 for (unsigned l = 0; l != NumElts; l += 8) {
3456 for (unsigned i = 0; i != 4; ++i)
3457 Idxs[i + l] = i + l;
3458 for (unsigned i = 0; i != 4; ++i)
3459 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3460 }
3461
3462 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3463
3464 if (CI->arg_size() == 4)
3465 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3466 CI->getArgOperand(2));
3467 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3468 Value *Op0 = CI->getArgOperand(0);
3469 Value *Op1 = CI->getArgOperand(1);
3470 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3471 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3472
3473 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3474 unsigned HalfLaneElts = NumLaneElts / 2;
3475
3476 SmallVector<int, 16> Idxs(NumElts);
3477 for (unsigned i = 0; i != NumElts; ++i) {
3478 // Base index is the starting element of the lane.
3479 Idxs[i] = i - (i % NumLaneElts);
3480 // If we are half way through the lane switch to the other source.
3481 if ((i % NumLaneElts) >= HalfLaneElts)
3482 Idxs[i] += NumElts;
3483 // Now select the specific element. By adding HalfLaneElts bits from
3484 // the immediate. Wrapping around the immediate every 8-bits.
3485 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3486 }
3487
3488 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3489
3490 Rep =
3491 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3492 } else if (Name.starts_with("avx512.mask.movddup") ||
3493 Name.starts_with("avx512.mask.movshdup") ||
3494 Name.starts_with("avx512.mask.movsldup")) {
3495 Value *Op0 = CI->getArgOperand(0);
3496 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3497 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3498
3499 unsigned Offset = 0;
3500 if (Name.starts_with("avx512.mask.movshdup."))
3501 Offset = 1;
3502
3503 SmallVector<int, 16> Idxs(NumElts);
3504 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3505 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3506 Idxs[i + l + 0] = i + l + Offset;
3507 Idxs[i + l + 1] = i + l + Offset;
3508 }
3509
3510 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3511
3512 Rep =
3513 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3514 } else if (Name.starts_with("avx512.mask.punpckl") ||
3515 Name.starts_with("avx512.mask.unpckl.")) {
3516 Value *Op0 = CI->getArgOperand(0);
3517 Value *Op1 = CI->getArgOperand(1);
3518 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3519 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3520
3521 SmallVector<int, 64> Idxs(NumElts);
3522 for (int l = 0; l != NumElts; l += NumLaneElts)
3523 for (int i = 0; i != NumLaneElts; ++i)
3524 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3525
3526 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3527
3528 Rep =
3529 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3530 } else if (Name.starts_with("avx512.mask.punpckh") ||
3531 Name.starts_with("avx512.mask.unpckh.")) {
3532 Value *Op0 = CI->getArgOperand(0);
3533 Value *Op1 = CI->getArgOperand(1);
3534 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3535 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3536
3537 SmallVector<int, 64> Idxs(NumElts);
3538 for (int l = 0; l != NumElts; l += NumLaneElts)
3539 for (int i = 0; i != NumLaneElts; ++i)
3540 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3541
3542 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3543
3544 Rep =
3545 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3546 } else if (Name.starts_with("avx512.mask.and.") ||
3547 Name.starts_with("avx512.mask.pand.")) {
3548 VectorType *FTy = cast<VectorType>(CI->getType());
3550 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3551 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3552 Rep = Builder.CreateBitCast(Rep, FTy);
3553 Rep =
3554 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3555 } else if (Name.starts_with("avx512.mask.andn.") ||
3556 Name.starts_with("avx512.mask.pandn.")) {
3557 VectorType *FTy = cast<VectorType>(CI->getType());
3559 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3560 Rep = Builder.CreateAnd(Rep,
3561 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3562 Rep = Builder.CreateBitCast(Rep, FTy);
3563 Rep =
3564 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3565 } else if (Name.starts_with("avx512.mask.or.") ||
3566 Name.starts_with("avx512.mask.por.")) {
3567 VectorType *FTy = cast<VectorType>(CI->getType());
3569 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3570 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3571 Rep = Builder.CreateBitCast(Rep, FTy);
3572 Rep =
3573 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3574 } else if (Name.starts_with("avx512.mask.xor.") ||
3575 Name.starts_with("avx512.mask.pxor.")) {
3576 VectorType *FTy = cast<VectorType>(CI->getType());
3578 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3579 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3580 Rep = Builder.CreateBitCast(Rep, FTy);
3581 Rep =
3582 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3583 } else if (Name.starts_with("avx512.mask.padd.")) {
3584 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3585 Rep =
3586 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3587 } else if (Name.starts_with("avx512.mask.psub.")) {
3588 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3589 Rep =
3590 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3591 } else if (Name.starts_with("avx512.mask.pmull.")) {
3592 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3593 Rep =
3594 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3595 } else if (Name.starts_with("avx512.mask.add.p")) {
3596 if (Name.ends_with(".512")) {
3597 Intrinsic::ID IID;
3598 if (Name[17] == 's')
3599 IID = Intrinsic::x86_avx512_add_ps_512;
3600 else
3601 IID = Intrinsic::x86_avx512_add_pd_512;
3602
3603 Rep = Builder.CreateIntrinsic(
3604 IID,
3605 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3606 } else {
3607 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3608 }
3609 Rep =
3610 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3611 } else if (Name.starts_with("avx512.mask.div.p")) {
3612 if (Name.ends_with(".512")) {
3613 Intrinsic::ID IID;
3614 if (Name[17] == 's')
3615 IID = Intrinsic::x86_avx512_div_ps_512;
3616 else
3617 IID = Intrinsic::x86_avx512_div_pd_512;
3618
3619 Rep = Builder.CreateIntrinsic(
3620 IID,
3621 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3622 } else {
3623 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3624 }
3625 Rep =
3626 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3627 } else if (Name.starts_with("avx512.mask.mul.p")) {
3628 if (Name.ends_with(".512")) {
3629 Intrinsic::ID IID;
3630 if (Name[17] == 's')
3631 IID = Intrinsic::x86_avx512_mul_ps_512;
3632 else
3633 IID = Intrinsic::x86_avx512_mul_pd_512;
3634
3635 Rep = Builder.CreateIntrinsic(
3636 IID,
3637 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3638 } else {
3639 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3640 }
3641 Rep =
3642 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3643 } else if (Name.starts_with("avx512.mask.sub.p")) {
3644 if (Name.ends_with(".512")) {
3645 Intrinsic::ID IID;
3646 if (Name[17] == 's')
3647 IID = Intrinsic::x86_avx512_sub_ps_512;
3648 else
3649 IID = Intrinsic::x86_avx512_sub_pd_512;
3650
3651 Rep = Builder.CreateIntrinsic(
3652 IID,
3653 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3654 } else {
3655 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3656 }
3657 Rep =
3658 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3659 } else if ((Name.starts_with("avx512.mask.max.p") ||
3660 Name.starts_with("avx512.mask.min.p")) &&
3661 Name.drop_front(18) == ".512") {
3662 bool IsDouble = Name[17] == 'd';
3663 bool IsMin = Name[13] == 'i';
3664 static const Intrinsic::ID MinMaxTbl[2][2] = {
3665 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3666 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3667 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3668
3669 Rep = Builder.CreateIntrinsic(
3670 IID,
3671 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3672 Rep =
3673 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3674 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3675 Rep =
3676 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3677 {CI->getArgOperand(0), Builder.getInt1(false)});
3678 Rep =
3679 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3680 } else if (Name.starts_with("avx512.mask.psll")) {
3681 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3682 bool IsVariable = Name[16] == 'v';
3683 char Size = Name[16] == '.' ? Name[17]
3684 : Name[17] == '.' ? Name[18]
3685 : Name[18] == '.' ? Name[19]
3686 : Name[20];
3687
3688 Intrinsic::ID IID;
3689 if (IsVariable && Name[17] != '.') {
3690 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3691 IID = Intrinsic::x86_avx2_psllv_q;
3692 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3693 IID = Intrinsic::x86_avx2_psllv_q_256;
3694 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3695 IID = Intrinsic::x86_avx2_psllv_d;
3696 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3697 IID = Intrinsic::x86_avx2_psllv_d_256;
3698 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3699 IID = Intrinsic::x86_avx512_psllv_w_128;
3700 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3701 IID = Intrinsic::x86_avx512_psllv_w_256;
3702 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3703 IID = Intrinsic::x86_avx512_psllv_w_512;
3704 else
3705 llvm_unreachable("Unexpected size");
3706 } else if (Name.ends_with(".128")) {
3707 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3708 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3709 : Intrinsic::x86_sse2_psll_d;
3710 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3711 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3712 : Intrinsic::x86_sse2_psll_q;
3713 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3714 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3715 : Intrinsic::x86_sse2_psll_w;
3716 else
3717 llvm_unreachable("Unexpected size");
3718 } else if (Name.ends_with(".256")) {
3719 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3720 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3721 : Intrinsic::x86_avx2_psll_d;
3722 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3723 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3724 : Intrinsic::x86_avx2_psll_q;
3725 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3726 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3727 : Intrinsic::x86_avx2_psll_w;
3728 else
3729 llvm_unreachable("Unexpected size");
3730 } else {
3731 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3732 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3733 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3734 : Intrinsic::x86_avx512_psll_d_512;
3735 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3736 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3737 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3738 : Intrinsic::x86_avx512_psll_q_512;
3739 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3740 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3741 : Intrinsic::x86_avx512_psll_w_512;
3742 else
3743 llvm_unreachable("Unexpected size");
3744 }
3745
3746 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3747 } else if (Name.starts_with("avx512.mask.psrl")) {
3748 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3749 bool IsVariable = Name[16] == 'v';
3750 char Size = Name[16] == '.' ? Name[17]
3751 : Name[17] == '.' ? Name[18]
3752 : Name[18] == '.' ? Name[19]
3753 : Name[20];
3754
3755 Intrinsic::ID IID;
3756 if (IsVariable && Name[17] != '.') {
3757 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3758 IID = Intrinsic::x86_avx2_psrlv_q;
3759 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3760 IID = Intrinsic::x86_avx2_psrlv_q_256;
3761 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3762 IID = Intrinsic::x86_avx2_psrlv_d;
3763 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3764 IID = Intrinsic::x86_avx2_psrlv_d_256;
3765 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3766 IID = Intrinsic::x86_avx512_psrlv_w_128;
3767 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3768 IID = Intrinsic::x86_avx512_psrlv_w_256;
3769 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3770 IID = Intrinsic::x86_avx512_psrlv_w_512;
3771 else
3772 llvm_unreachable("Unexpected size");
3773 } else if (Name.ends_with(".128")) {
3774 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3775 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3776 : Intrinsic::x86_sse2_psrl_d;
3777 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3778 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3779 : Intrinsic::x86_sse2_psrl_q;
3780 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3781 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3782 : Intrinsic::x86_sse2_psrl_w;
3783 else
3784 llvm_unreachable("Unexpected size");
3785 } else if (Name.ends_with(".256")) {
3786 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3787 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3788 : Intrinsic::x86_avx2_psrl_d;
3789 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3790 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3791 : Intrinsic::x86_avx2_psrl_q;
3792 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3793 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3794 : Intrinsic::x86_avx2_psrl_w;
3795 else
3796 llvm_unreachable("Unexpected size");
3797 } else {
3798 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3799 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3800 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3801 : Intrinsic::x86_avx512_psrl_d_512;
3802 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3803 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3804 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3805 : Intrinsic::x86_avx512_psrl_q_512;
3806 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3807 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3808 : Intrinsic::x86_avx512_psrl_w_512;
3809 else
3810 llvm_unreachable("Unexpected size");
3811 }
3812
3813 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3814 } else if (Name.starts_with("avx512.mask.psra")) {
3815 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3816 bool IsVariable = Name[16] == 'v';
3817 char Size = Name[16] == '.' ? Name[17]
3818 : Name[17] == '.' ? Name[18]
3819 : Name[18] == '.' ? Name[19]
3820 : Name[20];
3821
3822 Intrinsic::ID IID;
3823 if (IsVariable && Name[17] != '.') {
3824 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3825 IID = Intrinsic::x86_avx2_psrav_d;
3826 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3827 IID = Intrinsic::x86_avx2_psrav_d_256;
3828 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3829 IID = Intrinsic::x86_avx512_psrav_w_128;
3830 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3831 IID = Intrinsic::x86_avx512_psrav_w_256;
3832 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3833 IID = Intrinsic::x86_avx512_psrav_w_512;
3834 else
3835 llvm_unreachable("Unexpected size");
3836 } else if (Name.ends_with(".128")) {
3837 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3838 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3839 : Intrinsic::x86_sse2_psra_d;
3840 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3841 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3842 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3843 : Intrinsic::x86_avx512_psra_q_128;
3844 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3845 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3846 : Intrinsic::x86_sse2_psra_w;
3847 else
3848 llvm_unreachable("Unexpected size");
3849 } else if (Name.ends_with(".256")) {
3850 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3851 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3852 : Intrinsic::x86_avx2_psra_d;
3853 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3854 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3855 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3856 : Intrinsic::x86_avx512_psra_q_256;
3857 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3858 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3859 : Intrinsic::x86_avx2_psra_w;
3860 else
3861 llvm_unreachable("Unexpected size");
3862 } else {
3863 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3864 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3865 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3866 : Intrinsic::x86_avx512_psra_d_512;
3867 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3868 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3869 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3870 : Intrinsic::x86_avx512_psra_q_512;
3871 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3872 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3873 : Intrinsic::x86_avx512_psra_w_512;
3874 else
3875 llvm_unreachable("Unexpected size");
3876 }
3877
3878 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3879 } else if (Name.starts_with("avx512.mask.move.s")) {
3880 Rep = upgradeMaskedMove(Builder, *CI);
3881 } else if (Name.starts_with("avx512.cvtmask2")) {
3882 Rep = upgradeMaskToInt(Builder, *CI);
3883 } else if (Name.ends_with(".movntdqa")) {
3885 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3886
3887 LoadInst *LI = Builder.CreateAlignedLoad(
3888 CI->getType(), CI->getArgOperand(0),
3890 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3891 Rep = LI;
3892 } else if (Name.starts_with("fma.vfmadd.") ||
3893 Name.starts_with("fma.vfmsub.") ||
3894 Name.starts_with("fma.vfnmadd.") ||
3895 Name.starts_with("fma.vfnmsub.")) {
3896 bool NegMul = Name[6] == 'n';
3897 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3898 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3899
3900 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3901 CI->getArgOperand(2)};
3902
3903 if (IsScalar) {
3904 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3905 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3906 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3907 }
3908
3909 if (NegMul && !IsScalar)
3910 Ops[0] = Builder.CreateFNeg(Ops[0]);
3911 if (NegMul && IsScalar)
3912 Ops[1] = Builder.CreateFNeg(Ops[1]);
3913 if (NegAcc)
3914 Ops[2] = Builder.CreateFNeg(Ops[2]);
3915
3916 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3917
3918 if (IsScalar)
3919 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3920 } else if (Name.starts_with("fma4.vfmadd.s")) {
3921 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3922 CI->getArgOperand(2)};
3923
3924 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3925 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3926 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3927
3928 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3929
3930 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3931 Rep, (uint64_t)0);
3932 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3933 Name.starts_with("avx512.maskz.vfmadd.s") ||
3934 Name.starts_with("avx512.mask3.vfmadd.s") ||
3935 Name.starts_with("avx512.mask3.vfmsub.s") ||
3936 Name.starts_with("avx512.mask3.vfnmsub.s")) {
3937 bool IsMask3 = Name[11] == '3';
3938 bool IsMaskZ = Name[11] == 'z';
3939 // Drop the "avx512.mask." to make it easier.
3940 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3941 bool NegMul = Name[2] == 'n';
3942 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3943
3944 Value *A = CI->getArgOperand(0);
3945 Value *B = CI->getArgOperand(1);
3946 Value *C = CI->getArgOperand(2);
3947
3948 if (NegMul && (IsMask3 || IsMaskZ))
3949 A = Builder.CreateFNeg(A);
3950 if (NegMul && !(IsMask3 || IsMaskZ))
3951 B = Builder.CreateFNeg(B);
3952 if (NegAcc)
3953 C = Builder.CreateFNeg(C);
3954
3955 A = Builder.CreateExtractElement(A, (uint64_t)0);
3956 B = Builder.CreateExtractElement(B, (uint64_t)0);
3957 C = Builder.CreateExtractElement(C, (uint64_t)0);
3958
3959 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3960 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3961 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3962
3963 Intrinsic::ID IID;
3964 if (Name.back() == 'd')
3965 IID = Intrinsic::x86_avx512_vfmadd_f64;
3966 else
3967 IID = Intrinsic::x86_avx512_vfmadd_f32;
3968 Rep = Builder.CreateIntrinsic(IID, Ops);
3969 } else {
3970 Rep = Builder.CreateFMA(A, B, C);
3971 }
3972
3973 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
3974 : IsMask3 ? C
3975 : A;
3976
3977 // For Mask3 with NegAcc, we need to create a new extractelement that
3978 // avoids the negation above.
3979 if (NegAcc && IsMask3)
3980 PassThru =
3981 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3982
3983 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3984 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3985 (uint64_t)0);
3986 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3987 Name.starts_with("avx512.mask.vfnmadd.p") ||
3988 Name.starts_with("avx512.mask.vfnmsub.p") ||
3989 Name.starts_with("avx512.mask3.vfmadd.p") ||
3990 Name.starts_with("avx512.mask3.vfmsub.p") ||
3991 Name.starts_with("avx512.mask3.vfnmsub.p") ||
3992 Name.starts_with("avx512.maskz.vfmadd.p")) {
3993 bool IsMask3 = Name[11] == '3';
3994 bool IsMaskZ = Name[11] == 'z';
3995 // Drop the "avx512.mask." to make it easier.
3996 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3997 bool NegMul = Name[2] == 'n';
3998 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3999
4000 Value *A = CI->getArgOperand(0);
4001 Value *B = CI->getArgOperand(1);
4002 Value *C = CI->getArgOperand(2);
4003
4004 if (NegMul && (IsMask3 || IsMaskZ))
4005 A = Builder.CreateFNeg(A);
4006 if (NegMul && !(IsMask3 || IsMaskZ))
4007 B = Builder.CreateFNeg(B);
4008 if (NegAcc)
4009 C = Builder.CreateFNeg(C);
4010
4011 if (CI->arg_size() == 5 &&
4012 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4013 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4014 Intrinsic::ID IID;
4015 // Check the character before ".512" in string.
4016 if (Name[Name.size() - 5] == 's')
4017 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4018 else
4019 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4020
4021 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4022 } else {
4023 Rep = Builder.CreateFMA(A, B, C);
4024 }
4025
4026 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4027 : IsMask3 ? CI->getArgOperand(2)
4028 : CI->getArgOperand(0);
4029
4030 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4031 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4032 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4033 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4034 Intrinsic::ID IID;
4035 if (VecWidth == 128 && EltWidth == 32)
4036 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4037 else if (VecWidth == 256 && EltWidth == 32)
4038 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4039 else if (VecWidth == 128 && EltWidth == 64)
4040 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4041 else if (VecWidth == 256 && EltWidth == 64)
4042 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4043 else
4044 llvm_unreachable("Unexpected intrinsic");
4045
4046 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4047 CI->getArgOperand(2)};
4048 Ops[2] = Builder.CreateFNeg(Ops[2]);
4049 Rep = Builder.CreateIntrinsic(IID, Ops);
4050 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4051 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4052 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4053 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4054 bool IsMask3 = Name[11] == '3';
4055 bool IsMaskZ = Name[11] == 'z';
4056 // Drop the "avx512.mask." to make it easier.
4057 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4058 bool IsSubAdd = Name[3] == 's';
4059 if (CI->arg_size() == 5) {
4060 Intrinsic::ID IID;
4061 // Check the character before ".512" in string.
4062 if (Name[Name.size() - 5] == 's')
4063 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4064 else
4065 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4066
4067 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4068 CI->getArgOperand(2), CI->getArgOperand(4)};
4069 if (IsSubAdd)
4070 Ops[2] = Builder.CreateFNeg(Ops[2]);
4071
4072 Rep = Builder.CreateIntrinsic(IID, Ops);
4073 } else {
4074 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4075
4076 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4077 CI->getArgOperand(2)};
4078
4080 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4081 Value *Odd = Builder.CreateCall(FMA, Ops);
4082 Ops[2] = Builder.CreateFNeg(Ops[2]);
4083 Value *Even = Builder.CreateCall(FMA, Ops);
4084
4085 if (IsSubAdd)
4086 std::swap(Even, Odd);
4087
4088 SmallVector<int, 32> Idxs(NumElts);
4089 for (int i = 0; i != NumElts; ++i)
4090 Idxs[i] = i + (i % 2) * NumElts;
4091
4092 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4093 }
4094
4095 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4096 : IsMask3 ? CI->getArgOperand(2)
4097 : CI->getArgOperand(0);
4098
4099 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4100 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4101 Name.starts_with("avx512.maskz.pternlog.")) {
4102 bool ZeroMask = Name[11] == 'z';
4103 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4104 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4105 Intrinsic::ID IID;
4106 if (VecWidth == 128 && EltWidth == 32)
4107 IID = Intrinsic::x86_avx512_pternlog_d_128;
4108 else if (VecWidth == 256 && EltWidth == 32)
4109 IID = Intrinsic::x86_avx512_pternlog_d_256;
4110 else if (VecWidth == 512 && EltWidth == 32)
4111 IID = Intrinsic::x86_avx512_pternlog_d_512;
4112 else if (VecWidth == 128 && EltWidth == 64)
4113 IID = Intrinsic::x86_avx512_pternlog_q_128;
4114 else if (VecWidth == 256 && EltWidth == 64)
4115 IID = Intrinsic::x86_avx512_pternlog_q_256;
4116 else if (VecWidth == 512 && EltWidth == 64)
4117 IID = Intrinsic::x86_avx512_pternlog_q_512;
4118 else
4119 llvm_unreachable("Unexpected intrinsic");
4120
4121 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4122 CI->getArgOperand(2), CI->getArgOperand(3)};
4123 Rep = Builder.CreateIntrinsic(IID, Args);
4124 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4125 : CI->getArgOperand(0);
4126 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4127 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4128 Name.starts_with("avx512.maskz.vpmadd52")) {
4129 bool ZeroMask = Name[11] == 'z';
4130 bool High = Name[20] == 'h' || Name[21] == 'h';
4131 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4132 Intrinsic::ID IID;
4133 if (VecWidth == 128 && !High)
4134 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4135 else if (VecWidth == 256 && !High)
4136 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4137 else if (VecWidth == 512 && !High)
4138 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4139 else if (VecWidth == 128 && High)
4140 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4141 else if (VecWidth == 256 && High)
4142 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4143 else if (VecWidth == 512 && High)
4144 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4145 else
4146 llvm_unreachable("Unexpected intrinsic");
4147
4148 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4149 CI->getArgOperand(2)};
4150 Rep = Builder.CreateIntrinsic(IID, Args);
4151 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4152 : CI->getArgOperand(0);
4153 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4154 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4155 Name.starts_with("avx512.mask.vpermt2var.") ||
4156 Name.starts_with("avx512.maskz.vpermt2var.")) {
4157 bool ZeroMask = Name[11] == 'z';
4158 bool IndexForm = Name[17] == 'i';
4159 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4160 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4161 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4162 Name.starts_with("avx512.mask.vpdpbusds.") ||
4163 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4164 bool ZeroMask = Name[11] == 'z';
4165 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4166 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4167 Intrinsic::ID IID;
4168 if (VecWidth == 128 && !IsSaturating)
4169 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4170 else if (VecWidth == 256 && !IsSaturating)
4171 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4172 else if (VecWidth == 512 && !IsSaturating)
4173 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4174 else if (VecWidth == 128 && IsSaturating)
4175 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4176 else if (VecWidth == 256 && IsSaturating)
4177 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4178 else if (VecWidth == 512 && IsSaturating)
4179 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4180 else
4181 llvm_unreachable("Unexpected intrinsic");
4182
4183 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4184 CI->getArgOperand(2)};
4185
4186 // Input arguments types were incorrectly set to vectors of i32 before but
4187 // they should be vectors of i8. Insert bit cast when encountering the old
4188 // types
4189 if (Args[1]->getType()->isVectorTy() &&
4190 cast<VectorType>(Args[1]->getType())
4191 ->getElementType()
4192 ->isIntegerTy(32) &&
4193 Args[2]->getType()->isVectorTy() &&
4194 cast<VectorType>(Args[2]->getType())
4195 ->getElementType()
4196 ->isIntegerTy(32)) {
4197 Type *NewArgType = nullptr;
4198 if (VecWidth == 128)
4199 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4200 else if (VecWidth == 256)
4201 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4202 else if (VecWidth == 512)
4203 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4204 else
4205 llvm_unreachable("Unexpected vector bit width");
4206
4207 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4208 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4209 }
4210
4211 Rep = Builder.CreateIntrinsic(IID, Args);
4212 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4213 : CI->getArgOperand(0);
4214 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4215 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4216 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4217 Name.starts_with("avx512.mask.vpdpwssds.") ||
4218 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4219 bool ZeroMask = Name[11] == 'z';
4220 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4221 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4222 Intrinsic::ID IID;
4223 if (VecWidth == 128 && !IsSaturating)
4224 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4225 else if (VecWidth == 256 && !IsSaturating)
4226 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4227 else if (VecWidth == 512 && !IsSaturating)
4228 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4229 else if (VecWidth == 128 && IsSaturating)
4230 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4231 else if (VecWidth == 256 && IsSaturating)
4232 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4233 else if (VecWidth == 512 && IsSaturating)
4234 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4235 else
4236 llvm_unreachable("Unexpected intrinsic");
4237
4238 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4239 CI->getArgOperand(2)};
4240 Rep = Builder.CreateIntrinsic(IID, Args);
4241 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4242 : CI->getArgOperand(0);
4243 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4244 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4245 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4246 Name == "subborrow.u32" || Name == "subborrow.u64") {
4247 Intrinsic::ID IID;
4248 if (Name[0] == 'a' && Name.back() == '2')
4249 IID = Intrinsic::x86_addcarry_32;
4250 else if (Name[0] == 'a' && Name.back() == '4')
4251 IID = Intrinsic::x86_addcarry_64;
4252 else if (Name[0] == 's' && Name.back() == '2')
4253 IID = Intrinsic::x86_subborrow_32;
4254 else if (Name[0] == 's' && Name.back() == '4')
4255 IID = Intrinsic::x86_subborrow_64;
4256 else
4257 llvm_unreachable("Unexpected intrinsic");
4258
4259 // Make a call with 3 operands.
4260 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4261 CI->getArgOperand(2)};
4262 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4263
4264 // Extract the second result and store it.
4265 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4266 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4267 // Replace the original call result with the first result of the new call.
4268 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4269
4270 CI->replaceAllUsesWith(CF);
4271 Rep = nullptr;
4272 } else if (Name.starts_with("avx512.mask.") &&
4273 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4274 // Rep will be updated by the call in the condition.
4275 }
4276
4277 return Rep;
4278}
4279
4281 Function *F, IRBuilder<> &Builder) {
4282 if (Name.starts_with("neon.bfcvt")) {
4283 if (Name.starts_with("neon.bfcvtn2")) {
4284 SmallVector<int, 32> LoMask(4);
4285 std::iota(LoMask.begin(), LoMask.end(), 0);
4286 SmallVector<int, 32> ConcatMask(8);
4287 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4288 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4289 Value *Trunc =
4290 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4291 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4292 } else if (Name.starts_with("neon.bfcvtn")) {
4293 SmallVector<int, 32> ConcatMask(8);
4294 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4295 Type *V4BF16 =
4296 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4297 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4298 dbgs() << "Trunc: " << *Trunc << "\n";
4299 return Builder.CreateShuffleVector(
4300 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4301 } else {
4302 return Builder.CreateFPTrunc(CI->getOperand(0),
4303 Type::getBFloatTy(F->getContext()));
4304 }
4305 } else if (Name.starts_with("sve.fcvt")) {
4306 Intrinsic::ID NewID =
4308 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4309 .Case("sve.fcvtnt.bf16f32",
4310 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4312 if (NewID == Intrinsic::not_intrinsic)
4313 llvm_unreachable("Unhandled Intrinsic!");
4314
4315 SmallVector<Value *, 3> Args(CI->args());
4316
4317 // The original intrinsics incorrectly used a predicate based on the
4318 // smallest element type rather than the largest.
4319 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4320 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4321
4322 if (Args[1]->getType() != BadPredTy)
4323 llvm_unreachable("Unexpected predicate type!");
4324
4325 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4326 BadPredTy, Args[1]);
4327 Args[1] = Builder.CreateIntrinsic(
4328 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4329
4330 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4331 CI->getName());
4332 }
4333
4334 llvm_unreachable("Unhandled Intrinsic!");
4335}
4336
4338 IRBuilder<> &Builder) {
4339 if (Name == "mve.vctp64.old") {
4340 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4341 // correct type.
4342 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4343 CI->getArgOperand(0),
4344 /*FMFSource=*/nullptr, CI->getName());
4345 Value *C1 = Builder.CreateIntrinsic(
4346 Intrinsic::arm_mve_pred_v2i,
4347 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4348 return Builder.CreateIntrinsic(
4349 Intrinsic::arm_mve_pred_i2v,
4350 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4351 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4352 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4353 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4354 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4355 Name ==
4356 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4357 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4358 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4359 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4360 Name ==
4361 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4362 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4363 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4364 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4365 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4366 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4367 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4368 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4369 std::vector<Type *> Tys;
4370 unsigned ID = CI->getIntrinsicID();
4371 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4372 switch (ID) {
4373 case Intrinsic::arm_mve_mull_int_predicated:
4374 case Intrinsic::arm_mve_vqdmull_predicated:
4375 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4376 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4377 break;
4378 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4379 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4380 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4381 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4382 V2I1Ty};
4383 break;
4384 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4385 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4386 CI->getOperand(1)->getType(), V2I1Ty};
4387 break;
4388 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4389 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4390 CI->getOperand(2)->getType(), V2I1Ty};
4391 break;
4392 case Intrinsic::arm_cde_vcx1q_predicated:
4393 case Intrinsic::arm_cde_vcx1qa_predicated:
4394 case Intrinsic::arm_cde_vcx2q_predicated:
4395 case Intrinsic::arm_cde_vcx2qa_predicated:
4396 case Intrinsic::arm_cde_vcx3q_predicated:
4397 case Intrinsic::arm_cde_vcx3qa_predicated:
4398 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4399 break;
4400 default:
4401 llvm_unreachable("Unhandled Intrinsic!");
4402 }
4403
4404 std::vector<Value *> Ops;
4405 for (Value *Op : CI->args()) {
4406 Type *Ty = Op->getType();
4407 if (Ty->getScalarSizeInBits() == 1) {
4408 Value *C1 = Builder.CreateIntrinsic(
4409 Intrinsic::arm_mve_pred_v2i,
4410 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4411 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4412 }
4413 Ops.push_back(Op);
4414 }
4415
4416 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4417 CI->getName());
4418 }
4419 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4420}
4421
4422// These are expected to have the arguments:
4423// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4424//
4425// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4426//
4428 Function *F, IRBuilder<> &Builder) {
4429 AtomicRMWInst::BinOp RMWOp =
4431 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4432 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4433 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4434 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4435 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4436 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4437 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4438 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4439 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4440 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4441 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
4442
4443 unsigned NumOperands = CI->getNumOperands();
4444 if (NumOperands < 3) // Malformed bitcode.
4445 return nullptr;
4446
4447 Value *Ptr = CI->getArgOperand(0);
4448 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4449 if (!PtrTy) // Malformed.
4450 return nullptr;
4451
4452 Value *Val = CI->getArgOperand(1);
4453 if (Val->getType() != CI->getType()) // Malformed.
4454 return nullptr;
4455
4456 ConstantInt *OrderArg = nullptr;
4457 bool IsVolatile = false;
4458
4459 // These should have 5 arguments (plus the callee). A separate version of the
4460 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4461 if (NumOperands > 3)
4462 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4463
4464 // Ignore scope argument at 3
4465
4466 if (NumOperands > 5) {
4467 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4468 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4469 }
4470
4472 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4473 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4476
4477 LLVMContext &Ctx = F->getContext();
4478
4479 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4480 Type *RetTy = CI->getType();
4481 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4482 if (VT->getElementType()->isIntegerTy(16)) {
4483 VectorType *AsBF16 =
4484 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4485 Val = Builder.CreateBitCast(Val, AsBF16);
4486 }
4487 }
4488
4489 // The scope argument never really worked correctly. Use agent as the most
4490 // conservative option which should still always produce the instruction.
4491 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4492 AtomicRMWInst *RMW =
4493 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4494
4495 unsigned AddrSpace = PtrTy->getAddressSpace();
4496 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4497 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4498 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4499 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4500 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4501 }
4502
4503 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4504 MDBuilder MDB(F->getContext());
4505 MDNode *RangeNotPrivate =
4508 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4509 }
4510
4511 if (IsVolatile)
4512 RMW->setVolatile(true);
4513
4514 return Builder.CreateBitCast(RMW, RetTy);
4515}
4516
4517/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4518/// plain MDNode, as it's the verifier's job to check these are the correct
4519/// types later.
4520static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4521 if (Op < CI->arg_size()) {
4522 if (MetadataAsValue *MAV =
4524 Metadata *MD = MAV->getMetadata();
4525 return dyn_cast_if_present<MDNode>(MD);
4526 }
4527 }
4528 return nullptr;
4529}
4530
4531/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4532static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4533 if (Op < CI->arg_size())
4535 return MAV->getMetadata();
4536 return nullptr;
4537}
4538
4540 // The MDNode attached to this instruction might not be the correct type,
4541 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4542 return I->getDebugLoc().getAsMDNode();
4543}
4544
4545/// Convert debug intrinsic calls to non-instruction debug records.
4546/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4547/// \p CI - The debug intrinsic call.
4549 DbgRecord *DR = nullptr;
4550 if (Name == "label") {
4552 CI->getDebugLoc());
4553 } else if (Name == "assign") {
4556 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4557 unwrapMAVMetadataOp(CI, 4),
4558 /*The address is a Value ref, it will be stored as a Metadata */
4559 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4560 } else if (Name == "declare") {
4563 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4564 getDebugLocSafe(CI));
4565 } else if (Name == "addr") {
4566 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4567 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4568 // Don't try to add something to the expression if it's not an expression.
4569 // Instead, allow the verifier to fail later.
4570 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4571 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4572 }
4575 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4576 getDebugLocSafe(CI));
4577 } else if (Name == "value") {
4578 // An old version of dbg.value had an extra offset argument.
4579 unsigned VarOp = 1;
4580 unsigned ExprOp = 2;
4581 if (CI->arg_size() == 4) {
4583 // Nonzero offset dbg.values get dropped without a replacement.
4584 if (!Offset || !Offset->isZeroValue())
4585 return;
4586 VarOp = 2;
4587 ExprOp = 3;
4588 }
4591 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4592 nullptr, getDebugLocSafe(CI));
4593 }
4594 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4595 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4596}
4597
4598/// Upgrade a call to an old intrinsic. All argument and return casting must be
4599/// provided to seamlessly integrate with existing context.
4601 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4602 // checks the callee's function type matches. It's likely we need to handle
4603 // type changes here.
4605 if (!F)
4606 return;
4607
4608 LLVMContext &C = CI->getContext();
4609 IRBuilder<> Builder(C);
4610 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4611
4612 if (!NewFn) {
4613 // Get the Function's name.
4614 StringRef Name = F->getName();
4615
4616 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4617 Name = Name.substr(5);
4618
4619 bool IsX86 = Name.consume_front("x86.");
4620 bool IsNVVM = Name.consume_front("nvvm.");
4621 bool IsAArch64 = Name.consume_front("aarch64.");
4622 bool IsARM = Name.consume_front("arm.");
4623 bool IsAMDGCN = Name.consume_front("amdgcn.");
4624 bool IsDbg = Name.consume_front("dbg.");
4625 Value *Rep = nullptr;
4626
4627 if (!IsX86 && Name == "stackprotectorcheck") {
4628 Rep = nullptr;
4629 } else if (IsNVVM) {
4630 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4631 } else if (IsX86) {
4632 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4633 } else if (IsAArch64) {
4634 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4635 } else if (IsARM) {
4636 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4637 } else if (IsAMDGCN) {
4638 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4639 } else if (IsDbg) {
4641 } else {
4642 llvm_unreachable("Unknown function for CallBase upgrade.");
4643 }
4644
4645 if (Rep)
4646 CI->replaceAllUsesWith(Rep);
4647 CI->eraseFromParent();
4648 return;
4649 }
4650
4651 const auto &DefaultCase = [&]() -> void {
4652 if (F == NewFn)
4653 return;
4654
4655 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4656 // Handle generic mangling change.
4657 assert(
4658 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4659 "Unknown function for CallBase upgrade and isn't just a name change");
4660 CI->setCalledFunction(NewFn);
4661 return;
4662 }
4663
4664 // This must be an upgrade from a named to a literal struct.
4665 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4666 assert(OldST != NewFn->getReturnType() &&
4667 "Return type must have changed");
4668 assert(OldST->getNumElements() ==
4669 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4670 "Must have same number of elements");
4671
4672 SmallVector<Value *> Args(CI->args());
4673 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4674 NewCI->setAttributes(CI->getAttributes());
4675 Value *Res = PoisonValue::get(OldST);
4676 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4677 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4678 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4679 }
4680 CI->replaceAllUsesWith(Res);
4681 CI->eraseFromParent();
4682 return;
4683 }
4684
4685 // We're probably about to produce something invalid. Let the verifier catch
4686 // it instead of dying here.
4687 CI->setCalledOperand(
4689 return;
4690 };
4691 CallInst *NewCall = nullptr;
4692 switch (NewFn->getIntrinsicID()) {
4693 default: {
4694 DefaultCase();
4695 return;
4696 }
4697 case Intrinsic::arm_neon_vst1:
4698 case Intrinsic::arm_neon_vst2:
4699 case Intrinsic::arm_neon_vst3:
4700 case Intrinsic::arm_neon_vst4:
4701 case Intrinsic::arm_neon_vst2lane:
4702 case Intrinsic::arm_neon_vst3lane:
4703 case Intrinsic::arm_neon_vst4lane: {
4704 SmallVector<Value *, 4> Args(CI->args());
4705 NewCall = Builder.CreateCall(NewFn, Args);
4706 break;
4707 }
4708 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4709 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4710 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4711 LLVMContext &Ctx = F->getParent()->getContext();
4712 SmallVector<Value *, 4> Args(CI->args());
4713 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4714 cast<ConstantInt>(Args[3])->getZExtValue());
4715 NewCall = Builder.CreateCall(NewFn, Args);
4716 break;
4717 }
4718 case Intrinsic::aarch64_sve_ld3_sret:
4719 case Intrinsic::aarch64_sve_ld4_sret:
4720 case Intrinsic::aarch64_sve_ld2_sret: {
4721 StringRef Name = F->getName();
4722 Name = Name.substr(5);
4723 unsigned N = StringSwitch<unsigned>(Name)
4724 .StartsWith("aarch64.sve.ld2", 2)
4725 .StartsWith("aarch64.sve.ld3", 3)
4726 .StartsWith("aarch64.sve.ld4", 4)
4727 .Default(0);
4728 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4729 unsigned MinElts = RetTy->getMinNumElements() / N;
4730 SmallVector<Value *, 2> Args(CI->args());
4731 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4732 Value *Ret = llvm::PoisonValue::get(RetTy);
4733 for (unsigned I = 0; I < N; I++) {
4734 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4735 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
4736 }
4737 NewCall = dyn_cast<CallInst>(Ret);
4738 break;
4739 }
4740
4741 case Intrinsic::coro_end: {
4742 SmallVector<Value *, 3> Args(CI->args());
4743 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4744 NewCall = Builder.CreateCall(NewFn, Args);
4745 break;
4746 }
4747
4748 case Intrinsic::vector_extract: {
4749 StringRef Name = F->getName();
4750 Name = Name.substr(5); // Strip llvm
4751 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4752 DefaultCase();
4753 return;
4754 }
4755 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4756 unsigned MinElts = RetTy->getMinNumElements();
4757 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4758 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4759 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4760 break;
4761 }
4762
4763 case Intrinsic::vector_insert: {
4764 StringRef Name = F->getName();
4765 Name = Name.substr(5);
4766 if (!Name.starts_with("aarch64.sve.tuple")) {
4767 DefaultCase();
4768 return;
4769 }
4770 if (Name.starts_with("aarch64.sve.tuple.set")) {
4771 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4772 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4773 Value *NewIdx =
4774 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4775 NewCall = Builder.CreateCall(
4776 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4777 break;
4778 }
4779 if (Name.starts_with("aarch64.sve.tuple.create")) {
4780 unsigned N = StringSwitch<unsigned>(Name)
4781 .StartsWith("aarch64.sve.tuple.create2", 2)
4782 .StartsWith("aarch64.sve.tuple.create3", 3)
4783 .StartsWith("aarch64.sve.tuple.create4", 4)
4784 .Default(0);
4785 assert(N > 1 && "Create is expected to be between 2-4");
4786 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4787 Value *Ret = llvm::PoisonValue::get(RetTy);
4788 unsigned MinElts = RetTy->getMinNumElements() / N;
4789 for (unsigned I = 0; I < N; I++) {
4790 Value *V = CI->getArgOperand(I);
4791 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
4792 }
4793 NewCall = dyn_cast<CallInst>(Ret);
4794 }
4795 break;
4796 }
4797
4798 case Intrinsic::arm_neon_bfdot:
4799 case Intrinsic::arm_neon_bfmmla:
4800 case Intrinsic::arm_neon_bfmlalb:
4801 case Intrinsic::arm_neon_bfmlalt:
4802 case Intrinsic::aarch64_neon_bfdot:
4803 case Intrinsic::aarch64_neon_bfmmla:
4804 case Intrinsic::aarch64_neon_bfmlalb:
4805 case Intrinsic::aarch64_neon_bfmlalt: {
4807 assert(CI->arg_size() == 3 &&
4808 "Mismatch between function args and call args");
4809 size_t OperandWidth =
4811 assert((OperandWidth == 64 || OperandWidth == 128) &&
4812 "Unexpected operand width");
4813 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4814 auto Iter = CI->args().begin();
4815 Args.push_back(*Iter++);
4816 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4817 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4818 NewCall = Builder.CreateCall(NewFn, Args);
4819 break;
4820 }
4821
4822 case Intrinsic::bitreverse:
4823 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4824 break;
4825
4826 case Intrinsic::ctlz:
4827 case Intrinsic::cttz:
4828 assert(CI->arg_size() == 1 &&
4829 "Mismatch between function args and call args");
4830 NewCall =
4831 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4832 break;
4833
4834 case Intrinsic::objectsize: {
4835 Value *NullIsUnknownSize =
4836 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4837 Value *Dynamic =
4838 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4839 NewCall = Builder.CreateCall(
4840 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4841 break;
4842 }
4843
4844 case Intrinsic::ctpop:
4845 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4846 break;
4847
4848 case Intrinsic::convert_from_fp16:
4849 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4850 break;
4851
4852 case Intrinsic::dbg_value: {
4853 StringRef Name = F->getName();
4854 Name = Name.substr(5); // Strip llvm.
4855 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4856 if (Name.starts_with("dbg.addr")) {
4858 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4859 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4860 NewCall =
4861 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4862 MetadataAsValue::get(C, Expr)});
4863 break;
4864 }
4865
4866 // Upgrade from the old version that had an extra offset argument.
4867 assert(CI->arg_size() == 4);
4868 // Drop nonzero offsets instead of attempting to upgrade them.
4870 if (Offset->isZeroValue()) {
4871 NewCall = Builder.CreateCall(
4872 NewFn,
4873 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4874 break;
4875 }
4876 CI->eraseFromParent();
4877 return;
4878 }
4879
4880 case Intrinsic::ptr_annotation:
4881 // Upgrade from versions that lacked the annotation attribute argument.
4882 if (CI->arg_size() != 4) {
4883 DefaultCase();
4884 return;
4885 }
4886
4887 // Create a new call with an added null annotation attribute argument.
4888 NewCall = Builder.CreateCall(
4889 NewFn,
4890 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4891 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4892 NewCall->takeName(CI);
4893 CI->replaceAllUsesWith(NewCall);
4894 CI->eraseFromParent();
4895 return;
4896
4897 case Intrinsic::var_annotation:
4898 // Upgrade from versions that lacked the annotation attribute argument.
4899 if (CI->arg_size() != 4) {
4900 DefaultCase();
4901 return;
4902 }
4903 // Create a new call with an added null annotation attribute argument.
4904 NewCall = Builder.CreateCall(
4905 NewFn,
4906 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4907 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4908 NewCall->takeName(CI);
4909 CI->replaceAllUsesWith(NewCall);
4910 CI->eraseFromParent();
4911 return;
4912
4913 case Intrinsic::riscv_aes32dsi:
4914 case Intrinsic::riscv_aes32dsmi:
4915 case Intrinsic::riscv_aes32esi:
4916 case Intrinsic::riscv_aes32esmi:
4917 case Intrinsic::riscv_sm4ks:
4918 case Intrinsic::riscv_sm4ed: {
4919 // The last argument to these intrinsics used to be i8 and changed to i32.
4920 // The type overload for sm4ks and sm4ed was removed.
4921 Value *Arg2 = CI->getArgOperand(2);
4922 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4923 return;
4924
4925 Value *Arg0 = CI->getArgOperand(0);
4926 Value *Arg1 = CI->getArgOperand(1);
4927 if (CI->getType()->isIntegerTy(64)) {
4928 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4929 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4930 }
4931
4932 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4933 cast<ConstantInt>(Arg2)->getZExtValue());
4934
4935 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4936 Value *Res = NewCall;
4937 if (Res->getType() != CI->getType())
4938 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4939 NewCall->takeName(CI);
4940 CI->replaceAllUsesWith(Res);
4941 CI->eraseFromParent();
4942 return;
4943 }
4944 case Intrinsic::nvvm_mapa_shared_cluster: {
4945 // Create a new call with the correct address space.
4946 NewCall =
4947 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
4948 Value *Res = NewCall;
4949 Res = Builder.CreateAddrSpaceCast(
4950 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
4951 NewCall->takeName(CI);
4952 CI->replaceAllUsesWith(Res);
4953 CI->eraseFromParent();
4954 return;
4955 }
4956 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
4957 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
4958 // Create a new call with the correct address space.
4959 SmallVector<Value *, 4> Args(CI->args());
4960 Args[0] = Builder.CreateAddrSpaceCast(
4961 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
4962
4963 NewCall = Builder.CreateCall(NewFn, Args);
4964 NewCall->takeName(CI);
4965 CI->replaceAllUsesWith(NewCall);
4966 CI->eraseFromParent();
4967 return;
4968 }
4969 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
4970 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
4971 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
4972 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
4973 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
4974 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
4975 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
4976 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
4977 SmallVector<Value *, 16> Args(CI->args());
4978
4979 // Create AddrSpaceCast to shared_cluster if needed.
4980 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
4981 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
4983 Args[0] = Builder.CreateAddrSpaceCast(
4984 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
4985
4986 // Attach the flag argument for cta_group, with a
4987 // default value of 0. This handles case (2) in
4988 // shouldUpgradeNVPTXTMAG2SIntrinsics().
4989 size_t NumArgs = CI->arg_size();
4990 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
4991 if (!FlagArg->getType()->isIntegerTy(1))
4992 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
4993
4994 NewCall = Builder.CreateCall(NewFn, Args);
4995 NewCall->takeName(CI);
4996 CI->replaceAllUsesWith(NewCall);
4997 CI->eraseFromParent();
4998 return;
4999 }
5000 case Intrinsic::riscv_sha256sig0:
5001 case Intrinsic::riscv_sha256sig1:
5002 case Intrinsic::riscv_sha256sum0:
5003 case Intrinsic::riscv_sha256sum1:
5004 case Intrinsic::riscv_sm3p0:
5005 case Intrinsic::riscv_sm3p1: {
5006 // The last argument to these intrinsics used to be i8 and changed to i32.
5007 // The type overload for sm4ks and sm4ed was removed.
5008 if (!CI->getType()->isIntegerTy(64))
5009 return;
5010
5011 Value *Arg =
5012 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5013
5014 NewCall = Builder.CreateCall(NewFn, Arg);
5015 Value *Res =
5016 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5017 NewCall->takeName(CI);
5018 CI->replaceAllUsesWith(Res);
5019 CI->eraseFromParent();
5020 return;
5021 }
5022
5023 case Intrinsic::x86_xop_vfrcz_ss:
5024 case Intrinsic::x86_xop_vfrcz_sd:
5025 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5026 break;
5027
5028 case Intrinsic::x86_xop_vpermil2pd:
5029 case Intrinsic::x86_xop_vpermil2ps:
5030 case Intrinsic::x86_xop_vpermil2pd_256:
5031 case Intrinsic::x86_xop_vpermil2ps_256: {
5032 SmallVector<Value *, 4> Args(CI->args());
5033 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5034 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5035 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5036 NewCall = Builder.CreateCall(NewFn, Args);
5037 break;
5038 }
5039
5040 case Intrinsic::x86_sse41_ptestc:
5041 case Intrinsic::x86_sse41_ptestz:
5042 case Intrinsic::x86_sse41_ptestnzc: {
5043 // The arguments for these intrinsics used to be v4f32, and changed
5044 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5045 // So, the only thing required is a bitcast for both arguments.
5046 // First, check the arguments have the old type.
5047 Value *Arg0 = CI->getArgOperand(0);
5048 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5049 return;
5050
5051 // Old intrinsic, add bitcasts
5052 Value *Arg1 = CI->getArgOperand(1);
5053
5054 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5055
5056 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5057 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5058
5059 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5060 break;
5061 }
5062
5063 case Intrinsic::x86_rdtscp: {
5064 // This used to take 1 arguments. If we have no arguments, it is already
5065 // upgraded.
5066 if (CI->getNumOperands() == 0)
5067 return;
5068
5069 NewCall = Builder.CreateCall(NewFn);
5070 // Extract the second result and store it.
5071 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5072 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5073 // Replace the original call result with the first result of the new call.
5074 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5075
5076 NewCall->takeName(CI);
5077 CI->replaceAllUsesWith(TSC);
5078 CI->eraseFromParent();
5079 return;
5080 }
5081
5082 case Intrinsic::x86_sse41_insertps:
5083 case Intrinsic::x86_sse41_dppd:
5084 case Intrinsic::x86_sse41_dpps:
5085 case Intrinsic::x86_sse41_mpsadbw:
5086 case Intrinsic::x86_avx_dp_ps_256:
5087 case Intrinsic::x86_avx2_mpsadbw: {
5088 // Need to truncate the last argument from i32 to i8 -- this argument models
5089 // an inherently 8-bit immediate operand to these x86 instructions.
5090 SmallVector<Value *, 4> Args(CI->args());
5091
5092 // Replace the last argument with a trunc.
5093 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5094 NewCall = Builder.CreateCall(NewFn, Args);
5095 break;
5096 }
5097
5098 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5099 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5100 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5101 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5102 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5103 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5104 SmallVector<Value *, 4> Args(CI->args());
5105 unsigned NumElts =
5106 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5107 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5108
5109 NewCall = Builder.CreateCall(NewFn, Args);
5110 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5111
5112 NewCall->takeName(CI);
5113 CI->replaceAllUsesWith(Res);
5114 CI->eraseFromParent();
5115 return;
5116 }
5117
5118 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5119 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5120 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5121 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5122 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5123 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5124 SmallVector<Value *, 4> Args(CI->args());
5125 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5126 if (NewFn->getIntrinsicID() ==
5127 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5128 Args[1] = Builder.CreateBitCast(
5129 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5130
5131 NewCall = Builder.CreateCall(NewFn, Args);
5132 Value *Res = Builder.CreateBitCast(
5133 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5134
5135 NewCall->takeName(CI);
5136 CI->replaceAllUsesWith(Res);
5137 CI->eraseFromParent();
5138 return;
5139 }
5140 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5141 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5142 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5143 SmallVector<Value *, 4> Args(CI->args());
5144 unsigned NumElts =
5145 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5146 Args[1] = Builder.CreateBitCast(
5147 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5148 Args[2] = Builder.CreateBitCast(
5149 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5150
5151 NewCall = Builder.CreateCall(NewFn, Args);
5152 break;
5153 }
5154
5155 case Intrinsic::thread_pointer: {
5156 NewCall = Builder.CreateCall(NewFn, {});
5157 break;
5158 }
5159
5160 case Intrinsic::memcpy:
5161 case Intrinsic::memmove:
5162 case Intrinsic::memset: {
5163 // We have to make sure that the call signature is what we're expecting.
5164 // We only want to change the old signatures by removing the alignment arg:
5165 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5166 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5167 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5168 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5169 // Note: i8*'s in the above can be any pointer type
5170 if (CI->arg_size() != 5) {
5171 DefaultCase();
5172 return;
5173 }
5174 // Remove alignment argument (3), and add alignment attributes to the
5175 // dest/src pointers.
5176 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5177 CI->getArgOperand(2), CI->getArgOperand(4)};
5178 NewCall = Builder.CreateCall(NewFn, Args);
5179 AttributeList OldAttrs = CI->getAttributes();
5180 AttributeList NewAttrs = AttributeList::get(
5181 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5182 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5183 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5184 NewCall->setAttributes(NewAttrs);
5185 auto *MemCI = cast<MemIntrinsic>(NewCall);
5186 // All mem intrinsics support dest alignment.
5188 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5189 // Memcpy/Memmove also support source alignment.
5190 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5191 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5192 break;
5193 }
5194
5195 case Intrinsic::lifetime_start:
5196 case Intrinsic::lifetime_end: {
5197 if (CI->arg_size() != 2) {
5198 DefaultCase();
5199 return;
5200 }
5201
5202 Value *Ptr = CI->getArgOperand(1);
5203 // Try to strip pointer casts, such that the lifetime works on an alloca.
5204 Ptr = Ptr->stripPointerCasts();
5205 if (isa<AllocaInst>(Ptr)) {
5206 // Don't use NewFn, as we might have looked through an addrspacecast.
5207 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5208 NewCall = Builder.CreateLifetimeStart(Ptr);
5209 else
5210 NewCall = Builder.CreateLifetimeEnd(Ptr);
5211 break;
5212 }
5213
5214 // Otherwise remove the lifetime marker.
5215 CI->eraseFromParent();
5216 return;
5217 }
5218
5219 case Intrinsic::x86_avx512_vpdpbusd_128:
5220 case Intrinsic::x86_avx512_vpdpbusd_256:
5221 case Intrinsic::x86_avx512_vpdpbusd_512:
5222 case Intrinsic::x86_avx512_vpdpbusds_128:
5223 case Intrinsic::x86_avx512_vpdpbusds_256:
5224 case Intrinsic::x86_avx512_vpdpbusds_512: {
5225 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5226 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5227 CI->getArgOperand(2)};
5228 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5229 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5230 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5231
5232 NewCall = Builder.CreateCall(NewFn, Args);
5233 break;
5234 }
5235 }
5236 assert(NewCall && "Should have either set this variable or returned through "
5237 "the default case");
5238 NewCall->takeName(CI);
5239 CI->replaceAllUsesWith(NewCall);
5240 CI->eraseFromParent();
5241}
5242
5244 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5245
5246 // Check if this function should be upgraded and get the replacement function
5247 // if there is one.
5248 Function *NewFn;
5249 if (UpgradeIntrinsicFunction(F, NewFn)) {
5250 // Replace all users of the old function with the new function or new
5251 // instructions. This is not a range loop because the call is deleted.
5252 for (User *U : make_early_inc_range(F->users()))
5253 if (CallBase *CB = dyn_cast<CallBase>(U))
5254 UpgradeIntrinsicCall(CB, NewFn);
5255
5256 // Remove old function, no longer used, from the module.
5257 if (F != NewFn)
5258 F->eraseFromParent();
5259 }
5260}
5261
5263 const unsigned NumOperands = MD.getNumOperands();
5264 if (NumOperands == 0)
5265 return &MD; // Invalid, punt to a verifier error.
5266
5267 // Check if the tag uses struct-path aware TBAA format.
5268 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5269 return &MD;
5270
5271 auto &Context = MD.getContext();
5272 if (NumOperands == 3) {
5273 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5274 MDNode *ScalarType = MDNode::get(Context, Elts);
5275 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5276 Metadata *Elts2[] = {ScalarType, ScalarType,
5279 MD.getOperand(2)};
5280 return MDNode::get(Context, Elts2);
5281 }
5282 // Create a MDNode <MD, MD, offset 0>
5284 Type::getInt64Ty(Context)))};
5285 return MDNode::get(Context, Elts);
5286}
5287
5289 Instruction *&Temp) {
5290 if (Opc != Instruction::BitCast)
5291 return nullptr;
5292
5293 Temp = nullptr;
5294 Type *SrcTy = V->getType();
5295 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5296 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5297 LLVMContext &Context = V->getContext();
5298
5299 // We have no information about target data layout, so we assume that
5300 // the maximum pointer size is 64bit.
5301 Type *MidTy = Type::getInt64Ty(Context);
5302 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5303
5304 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5305 }
5306
5307 return nullptr;
5308}
5309
5311 if (Opc != Instruction::BitCast)
5312 return nullptr;
5313
5314 Type *SrcTy = C->getType();
5315 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5316 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5317 LLVMContext &Context = C->getContext();
5318
5319 // We have no information about target data layout, so we assume that
5320 // the maximum pointer size is 64bit.
5321 Type *MidTy = Type::getInt64Ty(Context);
5322
5324 DestTy);
5325 }
5326
5327 return nullptr;
5328}
5329
5330/// Check the debug info version number, if it is out-dated, drop the debug
5331/// info. Return true if module is modified.
5334 return false;
5335
5336 llvm::TimeTraceScope timeScope("Upgrade debug info");
5337 // We need to get metadata before the module is verified (i.e., getModuleFlag
5338 // makes assumptions that we haven't verified yet). Carefully extract the flag
5339 // from the metadata.
5340 unsigned Version = 0;
5341 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5342 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5343 if (Flag->getNumOperands() < 3)
5344 return false;
5345 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5346 return K->getString() == "Debug Info Version";
5347 return false;
5348 });
5349 if (OpIt != ModFlags->op_end()) {
5350 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5351 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5352 Version = CI->getZExtValue();
5353 }
5354 }
5355
5357 bool BrokenDebugInfo = false;
5358 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5359 report_fatal_error("Broken module found, compilation aborted!");
5360 if (!BrokenDebugInfo)
5361 // Everything is ok.
5362 return false;
5363 else {
5364 // Diagnose malformed debug info.
5366 M.getContext().diagnose(Diag);
5367 }
5368 }
5369 bool Modified = StripDebugInfo(M);
5371 // Diagnose a version mismatch.
5373 M.getContext().diagnose(DiagVersion);
5374 }
5375 return Modified;
5376}
5377
5378static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5379 GlobalValue *GV, const Metadata *V) {
5380 Function *F = cast<Function>(GV);
5381
5382 constexpr StringLiteral DefaultValue = "1";
5383 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5384 unsigned Length = 0;
5385
5386 if (F->hasFnAttribute(Attr)) {
5387 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5388 // parse these elements placing them into Vect3
5389 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5390 for (; Length < 3 && !S.empty(); Length++) {
5391 auto [Part, Rest] = S.split(',');
5392 Vect3[Length] = Part.trim();
5393 S = Rest;
5394 }
5395 }
5396
5397 const unsigned Dim = DimC - 'x';
5398 assert(Dim < 3 && "Unexpected dim char");
5399
5400 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5401
5402 // local variable required for StringRef in Vect3 to point to.
5403 const std::string VStr = llvm::utostr(VInt);
5404 Vect3[Dim] = VStr;
5405 Length = std::max(Length, Dim + 1);
5406
5407 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5408 F->addFnAttr(Attr, NewAttr);
5409}
5410
5411static inline bool isXYZ(StringRef S) {
5412 return S == "x" || S == "y" || S == "z";
5413}
5414
5416 const Metadata *V) {
5417 if (K == "kernel") {
5419 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5420 return true;
5421 }
5422 if (K == "align") {
5423 // V is a bitfeild specifying two 16-bit values. The alignment value is
5424 // specfied in low 16-bits, The index is specified in the high bits. For the
5425 // index, 0 indicates the return value while higher values correspond to
5426 // each parameter (idx = param + 1).
5427 const uint64_t AlignIdxValuePair =
5428 mdconst::extract<ConstantInt>(V)->getZExtValue();
5429 const unsigned Idx = (AlignIdxValuePair >> 16);
5430 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5431 cast<Function>(GV)->addAttributeAtIndex(
5432 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5433 return true;
5434 }
5435 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5436 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5437 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5438 return true;
5439 }
5440 if (K == "minctasm") {
5441 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5442 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5443 return true;
5444 }
5445 if (K == "maxnreg") {
5446 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5447 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5448 return true;
5449 }
5450 if (K.consume_front("maxntid") && isXYZ(K)) {
5451 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5452 return true;
5453 }
5454 if (K.consume_front("reqntid") && isXYZ(K)) {
5455 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5456 return true;
5457 }
5458 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5459 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5460 return true;
5461 }
5462 if (K == "grid_constant") {
5463 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5464 for (const auto &Op : cast<MDNode>(V)->operands()) {
5465 // For some reason, the index is 1-based in the metadata. Good thing we're
5466 // able to auto-upgrade it!
5467 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5468 cast<Function>(GV)->addParamAttr(Index, Attr);
5469 }
5470 return true;
5471 }
5472
5473 return false;
5474}
5475
5477 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5478 if (!NamedMD)
5479 return;
5480
5481 SmallVector<MDNode *, 8> NewNodes;
5483 for (MDNode *MD : NamedMD->operands()) {
5484 if (!SeenNodes.insert(MD).second)
5485 continue;
5486
5487 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5488 if (!GV)
5489 continue;
5490
5491 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5492
5493 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5494 // Each nvvm.annotations metadata entry will be of the following form:
5495 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5496 // start index = 1, to skip the global variable key
5497 // increment = 2, to skip the value for each property-value pairs
5498 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5499 MDString *K = cast<MDString>(MD->getOperand(j));
5500 const MDOperand &V = MD->getOperand(j + 1);
5501 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5502 if (!Upgraded)
5503 NewOperands.append({K, V});
5504 }
5505
5506 if (NewOperands.size() > 1)
5507 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5508 }
5509
5510 NamedMD->clearOperands();
5511 for (MDNode *N : NewNodes)
5512 NamedMD->addOperand(N);
5513}
5514
5515/// This checks for objc retain release marker which should be upgraded. It
5516/// returns true if module is modified.
5518 bool Changed = false;
5519 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5520 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5521 if (ModRetainReleaseMarker) {
5522 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5523 if (Op) {
5524 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5525 if (ID) {
5526 SmallVector<StringRef, 4> ValueComp;
5527 ID->getString().split(ValueComp, "#");
5528 if (ValueComp.size() == 2) {
5529 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5530 ID = MDString::get(M.getContext(), NewValue);
5531 }
5532 M.addModuleFlag(Module::Error, MarkerKey, ID);
5533 M.eraseNamedMetadata(ModRetainReleaseMarker);
5534 Changed = true;
5535 }
5536 }
5537 }
5538 return Changed;
5539}
5540
5542 // This lambda converts normal function calls to ARC runtime functions to
5543 // intrinsic calls.
5544 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5545 llvm::Intrinsic::ID IntrinsicFunc) {
5546 Function *Fn = M.getFunction(OldFunc);
5547
5548 if (!Fn)
5549 return;
5550
5551 Function *NewFn =
5552 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5553
5554 for (User *U : make_early_inc_range(Fn->users())) {
5556 if (!CI || CI->getCalledFunction() != Fn)
5557 continue;
5558
5559 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5560 FunctionType *NewFuncTy = NewFn->getFunctionType();
5562
5563 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5564 // value to the return type of the old function.
5565 if (NewFuncTy->getReturnType() != CI->getType() &&
5566 !CastInst::castIsValid(Instruction::BitCast, CI,
5567 NewFuncTy->getReturnType()))
5568 continue;
5569
5570 bool InvalidCast = false;
5571
5572 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5573 Value *Arg = CI->getArgOperand(I);
5574
5575 // Bitcast argument to the parameter type of the new function if it's
5576 // not a variadic argument.
5577 if (I < NewFuncTy->getNumParams()) {
5578 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5579 // to the parameter type of the new function.
5580 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5581 NewFuncTy->getParamType(I))) {
5582 InvalidCast = true;
5583 break;
5584 }
5585 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5586 }
5587 Args.push_back(Arg);
5588 }
5589
5590 if (InvalidCast)
5591 continue;
5592
5593 // Create a call instruction that calls the new function.
5594 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5595 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5596 NewCall->takeName(CI);
5597
5598 // Bitcast the return value back to the type of the old call.
5599 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5600
5601 if (!CI->use_empty())
5602 CI->replaceAllUsesWith(NewRetVal);
5603 CI->eraseFromParent();
5604 }
5605
5606 if (Fn->use_empty())
5607 Fn->eraseFromParent();
5608 };
5609
5610 // Unconditionally convert a call to "clang.arc.use" to a call to
5611 // "llvm.objc.clang.arc.use".
5612 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5613
5614 // Upgrade the retain release marker. If there is no need to upgrade
5615 // the marker, that means either the module is already new enough to contain
5616 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5618 return;
5619
5620 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5621 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5622 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5623 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5624 {"objc_autoreleaseReturnValue",
5625 llvm::Intrinsic::objc_autoreleaseReturnValue},
5626 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5627 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5628 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5629 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5630 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5631 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5632 {"objc_release", llvm::Intrinsic::objc_release},
5633 {"objc_retain", llvm::Intrinsic::objc_retain},
5634 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5635 {"objc_retainAutoreleaseReturnValue",
5636 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5637 {"objc_retainAutoreleasedReturnValue",
5638 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5639 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5640 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5641 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5642 {"objc_unsafeClaimAutoreleasedReturnValue",
5643 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5644 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5645 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5646 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5647 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5648 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5649 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5650 {"objc_arc_annotation_topdown_bbstart",
5651 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5652 {"objc_arc_annotation_topdown_bbend",
5653 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5654 {"objc_arc_annotation_bottomup_bbstart",
5655 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5656 {"objc_arc_annotation_bottomup_bbend",
5657 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5658
5659 for (auto &I : RuntimeFuncs)
5660 UpgradeToIntrinsic(I.first, I.second);
5661}
5662
5664 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5665 if (!ModFlags)
5666 return false;
5667
5668 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5669 bool HasSwiftVersionFlag = false;
5670 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5671 uint32_t SwiftABIVersion;
5672 auto Int8Ty = Type::getInt8Ty(M.getContext());
5673 auto Int32Ty = Type::getInt32Ty(M.getContext());
5674
5675 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5676 MDNode *Op = ModFlags->getOperand(I);
5677 if (Op->getNumOperands() != 3)
5678 continue;
5679 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5680 if (!ID)
5681 continue;
5682 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5683 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5684 Type::getInt32Ty(M.getContext()), B)),
5685 MDString::get(M.getContext(), ID->getString()),
5686 Op->getOperand(2)};
5687 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5688 Changed = true;
5689 };
5690
5691 if (ID->getString() == "Objective-C Image Info Version")
5692 HasObjCFlag = true;
5693 if (ID->getString() == "Objective-C Class Properties")
5694 HasClassProperties = true;
5695 // Upgrade PIC from Error/Max to Min.
5696 if (ID->getString() == "PIC Level") {
5697 if (auto *Behavior =
5699 uint64_t V = Behavior->getLimitedValue();
5700 if (V == Module::Error || V == Module::Max)
5701 SetBehavior(Module::Min);
5702 }
5703 }
5704 // Upgrade "PIE Level" from Error to Max.
5705 if (ID->getString() == "PIE Level")
5706 if (auto *Behavior =
5708 if (Behavior->getLimitedValue() == Module::Error)
5709 SetBehavior(Module::Max);
5710
5711 // Upgrade branch protection and return address signing module flags. The
5712 // module flag behavior for these fields were Error and now they are Min.
5713 if (ID->getString() == "branch-target-enforcement" ||
5714 ID->getString().starts_with("sign-return-address")) {
5715 if (auto *Behavior =
5717 if (Behavior->getLimitedValue() == Module::Error) {
5718 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5719 Metadata *Ops[3] = {
5720 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5721 Op->getOperand(1), Op->getOperand(2)};
5722 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5723 Changed = true;
5724 }
5725 }
5726 }
5727
5728 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5729 // section name so that llvm-lto will not complain about mismatching
5730 // module flags that is functionally the same.
5731 if (ID->getString() == "Objective-C Image Info Section") {
5732 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5733 SmallVector<StringRef, 4> ValueComp;
5734 Value->getString().split(ValueComp, " ");
5735 if (ValueComp.size() != 1) {
5736 std::string NewValue;
5737 for (auto &S : ValueComp)
5738 NewValue += S.str();
5739 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5740 MDString::get(M.getContext(), NewValue)};
5741 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5742 Changed = true;
5743 }
5744 }
5745 }
5746
5747 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5748 // If the higher bits are set, it adds new module flag for swift info.
5749 if (ID->getString() == "Objective-C Garbage Collection") {
5750 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5751 if (Md) {
5752 assert(Md->getValue() && "Expected non-empty metadata");
5753 auto Type = Md->getValue()->getType();
5754 if (Type == Int8Ty)
5755 continue;
5756 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5757 if ((Val & 0xff) != Val) {
5758 HasSwiftVersionFlag = true;
5759 SwiftABIVersion = (Val & 0xff00) >> 8;
5760 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5761 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5762 }
5763 Metadata *Ops[3] = {
5765 Op->getOperand(1),
5766 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5767 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5768 Changed = true;
5769 }
5770 }
5771
5772 if (ID->getString() == "amdgpu_code_object_version") {
5773 Metadata *Ops[3] = {
5774 Op->getOperand(0),
5775 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5776 Op->getOperand(2)};
5777 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5778 Changed = true;
5779 }
5780 }
5781
5782 // "Objective-C Class Properties" is recently added for Objective-C. We
5783 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5784 // flag of value 0, so we can correclty downgrade this flag when trying to
5785 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5786 // this module flag.
5787 if (HasObjCFlag && !HasClassProperties) {
5788 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5789 (uint32_t)0);
5790 Changed = true;
5791 }
5792
5793 if (HasSwiftVersionFlag) {
5794 M.addModuleFlag(Module::Error, "Swift ABI Version",
5795 SwiftABIVersion);
5796 M.addModuleFlag(Module::Error, "Swift Major Version",
5797 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5798 M.addModuleFlag(Module::Error, "Swift Minor Version",
5799 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5800 Changed = true;
5801 }
5802
5803 return Changed;
5804}
5805
5807 auto TrimSpaces = [](StringRef Section) -> std::string {
5808 SmallVector<StringRef, 5> Components;
5809 Section.split(Components, ',');
5810
5811 SmallString<32> Buffer;
5812 raw_svector_ostream OS(Buffer);
5813
5814 for (auto Component : Components)
5815 OS << ',' << Component.trim();
5816
5817 return std::string(OS.str().substr(1));
5818 };
5819
5820 for (auto &GV : M.globals()) {
5821 if (!GV.hasSection())
5822 continue;
5823
5824 StringRef Section = GV.getSection();
5825
5826 if (!Section.starts_with("__DATA, __objc_catlist"))
5827 continue;
5828
5829 // __DATA, __objc_catlist, regular, no_dead_strip
5830 // __DATA,__objc_catlist,regular,no_dead_strip
5831 GV.setSection(TrimSpaces(Section));
5832 }
5833}
5834
5835namespace {
5836// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5837// callsites within a function that did not also have the strictfp attribute.
5838// Since 10.0, if strict FP semantics are needed within a function, the
5839// function must have the strictfp attribute and all calls within the function
5840// must also have the strictfp attribute. This latter restriction is
5841// necessary to prevent unwanted libcall simplification when a function is
5842// being cloned (such as for inlining).
5843//
5844// The "dangling" strictfp attribute usage was only used to prevent constant
5845// folding and other libcall simplification. The nobuiltin attribute on the
5846// callsite has the same effect.
5847struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5848 StrictFPUpgradeVisitor() = default;
5849
5850 void visitCallBase(CallBase &Call) {
5851 if (!Call.isStrictFP())
5852 return;
5854 return;
5855 // If we get here, the caller doesn't have the strictfp attribute
5856 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5857 Call.removeFnAttr(Attribute::StrictFP);
5858 Call.addFnAttr(Attribute::NoBuiltin);
5859 }
5860};
5861
5862/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
5863struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5864 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5865 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
5866
5867 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
5868 if (!RMW.isFloatingPointOperation())
5869 return;
5870
5871 MDNode *Empty = MDNode::get(RMW.getContext(), {});
5872 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
5873 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
5874 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
5875 }
5876};
5877} // namespace
5878
5880 // If a function definition doesn't have the strictfp attribute,
5881 // convert any callsite strictfp attributes to nobuiltin.
5882 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5883 StrictFPUpgradeVisitor SFPV;
5884 SFPV.visit(F);
5885 }
5886
5887 // Remove all incompatibile attributes from function.
5888 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
5889 F.getReturnType(), F.getAttributes().getRetAttrs()));
5890 for (auto &Arg : F.args())
5891 Arg.removeAttrs(
5892 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
5893
5894 // Older versions of LLVM treated an "implicit-section-name" attribute
5895 // similarly to directly setting the section on a Function.
5896 if (Attribute A = F.getFnAttribute("implicit-section-name");
5897 A.isValid() && A.isStringAttribute()) {
5898 F.setSection(A.getValueAsString());
5899 F.removeFnAttr("implicit-section-name");
5900 }
5901
5902 if (!F.empty()) {
5903 // For some reason this is called twice, and the first time is before any
5904 // instructions are loaded into the body.
5905
5906 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
5907 A.isValid()) {
5908
5909 if (A.getValueAsBool()) {
5910 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5911 Visitor.visit(F);
5912 }
5913
5914 // We will leave behind dead attribute uses on external declarations, but
5915 // clang never added these to declarations anyway.
5916 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
5917 }
5918 }
5919}
5920
5921static bool isOldLoopArgument(Metadata *MD) {
5922 auto *T = dyn_cast_or_null<MDTuple>(MD);
5923 if (!T)
5924 return false;
5925 if (T->getNumOperands() < 1)
5926 return false;
5927 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5928 if (!S)
5929 return false;
5930 return S->getString().starts_with("llvm.vectorizer.");
5931}
5932
5934 StringRef OldPrefix = "llvm.vectorizer.";
5935 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5936
5937 if (OldTag == "llvm.vectorizer.unroll")
5938 return MDString::get(C, "llvm.loop.interleave.count");
5939
5940 return MDString::get(
5941 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5942 .str());
5943}
5944
5946 auto *T = dyn_cast_or_null<MDTuple>(MD);
5947 if (!T)
5948 return MD;
5949 if (T->getNumOperands() < 1)
5950 return MD;
5951 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5952 if (!OldTag)
5953 return MD;
5954 if (!OldTag->getString().starts_with("llvm.vectorizer."))
5955 return MD;
5956
5957 // This has an old tag. Upgrade it.
5959 Ops.reserve(T->getNumOperands());
5960 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5961 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5962 Ops.push_back(T->getOperand(I));
5963
5964 return MDTuple::get(T->getContext(), Ops);
5965}
5966
5968 auto *T = dyn_cast<MDTuple>(&N);
5969 if (!T)
5970 return &N;
5971
5972 if (none_of(T->operands(), isOldLoopArgument))
5973 return &N;
5974
5976 Ops.reserve(T->getNumOperands());
5977 for (Metadata *MD : T->operands())
5978 Ops.push_back(upgradeLoopArgument(MD));
5979
5980 return MDTuple::get(T->getContext(), Ops);
5981}
5982
5984 Triple T(TT);
5985 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5986 // the address space of globals to 1. This does not apply to SPIRV Logical.
5987 if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5988 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5989 !DL.contains("-G") && !DL.starts_with("G")) {
5990 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5991 }
5992
5993 if (T.isLoongArch64() || T.isRISCV64()) {
5994 // Make i32 a native type for 64-bit LoongArch and RISC-V.
5995 auto I = DL.find("-n64-");
5996 if (I != StringRef::npos)
5997 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5998 return DL.str();
5999 }
6000
6001 std::string Res = DL.str();
6002 // AMDGCN data layout upgrades.
6003 if (T.isAMDGCN()) {
6004 // Define address spaces for constants.
6005 if (!DL.contains("-G") && !DL.starts_with("G"))
6006 Res.append(Res.empty() ? "G1" : "-G1");
6007
6008 // Add missing non-integral declarations.
6009 // This goes before adding new address spaces to prevent incoherent string
6010 // values.
6011 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6012 Res.append("-ni:7:8:9");
6013 // Update ni:7 to ni:7:8:9.
6014 if (DL.ends_with("ni:7"))
6015 Res.append(":8:9");
6016 if (DL.ends_with("ni:7:8"))
6017 Res.append(":9");
6018
6019 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6020 // resources) An empty data layout has already been upgraded to G1 by now.
6021 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6022 Res.append("-p7:160:256:256:32");
6023 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6024 Res.append("-p8:128:128:128:48");
6025 constexpr StringRef OldP8("-p8:128:128-");
6026 if (DL.contains(OldP8))
6027 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6028 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6029 Res.append("-p9:192:256:256:32");
6030
6031 return Res;
6032 }
6033
6034 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6035 // If the datalayout matches the expected format, add pointer size address
6036 // spaces to the datalayout.
6037 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6038 if (!DL.contains(AddrSpaces)) {
6040 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6041 if (R.match(Res, &Groups))
6042 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6043 }
6044 };
6045
6046 // AArch64 data layout upgrades.
6047 if (T.isAArch64()) {
6048 // Add "-Fn32"
6049 if (!DL.empty() && !DL.contains("-Fn32"))
6050 Res.append("-Fn32");
6051 AddPtr32Ptr64AddrSpaces();
6052 return Res;
6053 }
6054
6055 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6056 T.isWasm()) {
6057 // Mips64 with o32 ABI did not add "-i128:128".
6058 // Add "-i128:128"
6059 std::string I64 = "-i64:64";
6060 std::string I128 = "-i128:128";
6061 if (!StringRef(Res).contains(I128)) {
6062 size_t Pos = Res.find(I64);
6063 if (Pos != size_t(-1))
6064 Res.insert(Pos + I64.size(), I128);
6065 }
6066 return Res;
6067 }
6068
6069 if (!T.isX86())
6070 return Res;
6071
6072 AddPtr32Ptr64AddrSpaces();
6073
6074 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6075 // for i128 operations prior to this being reflected in the data layout, and
6076 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6077 // boundaries, so although this is a breaking change, the upgrade is expected
6078 // to fix more IR than it breaks.
6079 // Intel MCU is an exception and uses 4-byte-alignment.
6080 if (!T.isOSIAMCU()) {
6081 std::string I128 = "-i128:128";
6082 if (StringRef Ref = Res; !Ref.contains(I128)) {
6084 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6085 if (R.match(Res, &Groups))
6086 Res = (Groups[1] + I128 + Groups[3]).str();
6087 }
6088 }
6089
6090 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6091 // Raising the alignment is safe because Clang did not produce f80 values in
6092 // the MSVC environment before this upgrade was added.
6093 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6094 StringRef Ref = Res;
6095 auto I = Ref.find("-f80:32-");
6096 if (I != StringRef::npos)
6097 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6098 }
6099
6100 return Res;
6101}
6102
6103void llvm::UpgradeAttributes(AttrBuilder &B) {
6104 StringRef FramePointer;
6105 Attribute A = B.getAttribute("no-frame-pointer-elim");
6106 if (A.isValid()) {
6107 // The value can be "true" or "false".
6108 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6109 B.removeAttribute("no-frame-pointer-elim");
6110 }
6111 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6112 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6113 if (FramePointer != "all")
6114 FramePointer = "non-leaf";
6115 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6116 }
6117 if (!FramePointer.empty())
6118 B.addAttribute("frame-pointer", FramePointer);
6119
6120 A = B.getAttribute("null-pointer-is-valid");
6121 if (A.isValid()) {
6122 // The value can be "true" or "false".
6123 bool NullPointerIsValid = A.getValueAsString() == "true";
6124 B.removeAttribute("null-pointer-is-valid");
6125 if (NullPointerIsValid)
6126 B.addAttribute(Attribute::NullPointerIsValid);
6127 }
6128}
6129
6130void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6131 // clang.arc.attachedcall bundles are now required to have an operand.
6132 // If they don't, it's okay to drop them entirely: when there is an operand,
6133 // the "attachedcall" is meaningful and required, but without an operand,
6134 // it's just a marker NOP. Dropping it merely prevents an optimization.
6135 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6136 return OBD.getTag() == "clang.arc.attachedcall" &&
6137 OBD.inputs().empty();
6138 });
6139}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ ICMP_NE
not equal
Definition InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:704
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:535
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
const Function & getFunction() const
Definition Function.h:164
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:448
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Argument * getArg(unsigned i) const
Definition Function.h:884
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1077
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1445
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1565
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1451
LLVMContext & getContext() const
Definition Metadata.h:1241
Tracking metadata reference owned by Metadata.
Definition Metadata.h:899
A single uniqued string.
Definition Metadata.h:720
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:607
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1522
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:182
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:103
Root of the metadata hierarchy.
Definition Metadata.h:63
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1753
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1849
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:825
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:862
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:710
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:581
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:151
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:619
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:824
static constexpr size_t npos
Definition StringRef.h:57
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:414
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:283
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:134
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:707
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:666
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1685
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:738
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:646
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1741
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1760
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2122
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:53
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117