LLVM 22.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsARM.h"
36#include "llvm/IR/IntrinsicsNVPTX.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
38#include "llvm/IR/IntrinsicsWebAssembly.h"
39#include "llvm/IR/IntrinsicsX86.h"
40#include "llvm/IR/LLVMContext.h"
41#include "llvm/IR/MDBuilder.h"
42#include "llvm/IR/Metadata.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/Value.h"
45#include "llvm/IR/Verifier.h"
50#include "llvm/Support/Regex.h"
52#include <cstdint>
53#include <cstring>
54#include <numeric>
55
56using namespace llvm;
57
58static cl::opt<bool>
59 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
60 cl::desc("Disable autoupgrade of debug info"));
61
62static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
63
64// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
65// changed their type from v4f32 to v2i64.
67 Function *&NewFn) {
68 // Check whether this is an old version of the function, which received
69 // v4f32 arguments.
70 Type *Arg0Type = F->getFunctionType()->getParamType(0);
71 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
72 return false;
73
74 // Yes, it's old, replace it with new version.
75 rename(F);
76 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
77 return true;
78}
79
80// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
81// arguments have changed their type from i32 to i8.
83 Function *&NewFn) {
84 // Check that the last argument is an i32.
85 Type *LastArgType = F->getFunctionType()->getParamType(
86 F->getFunctionType()->getNumParams() - 1);
87 if (!LastArgType->isIntegerTy(32))
88 return false;
89
90 // Move this function aside and map down.
91 rename(F);
92 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
93 return true;
94}
95
96// Upgrade the declaration of fp compare intrinsics that change return type
97// from scalar to vXi1 mask.
99 Function *&NewFn) {
100 // Check if the return type is a vector.
101 if (F->getReturnType()->isVectorTy())
102 return false;
103
104 rename(F);
105 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
106 return true;
107}
108
110 Function *&NewFn) {
111 if (F->getReturnType()->getScalarType()->isBFloatTy())
112 return false;
113
114 rename(F);
115 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
116 return true;
117}
118
120 Function *&NewFn) {
121 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
122 return false;
123
124 rename(F);
125 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
126 return true;
127}
128
130 // All of the intrinsics matches below should be marked with which llvm
131 // version started autoupgrading them. At some point in the future we would
132 // like to use this information to remove upgrade code for some older
133 // intrinsics. It is currently undecided how we will determine that future
134 // point.
135 if (Name.consume_front("avx."))
136 return (Name.starts_with("blend.p") || // Added in 3.7
137 Name == "cvt.ps2.pd.256" || // Added in 3.9
138 Name == "cvtdq2.pd.256" || // Added in 3.9
139 Name == "cvtdq2.ps.256" || // Added in 7.0
140 Name.starts_with("movnt.") || // Added in 3.2
141 Name.starts_with("sqrt.p") || // Added in 7.0
142 Name.starts_with("storeu.") || // Added in 3.9
143 Name.starts_with("vbroadcast.s") || // Added in 3.5
144 Name.starts_with("vbroadcastf128") || // Added in 4.0
145 Name.starts_with("vextractf128.") || // Added in 3.7
146 Name.starts_with("vinsertf128.") || // Added in 3.7
147 Name.starts_with("vperm2f128.") || // Added in 6.0
148 Name.starts_with("vpermil.")); // Added in 3.1
149
150 if (Name.consume_front("avx2."))
151 return (Name == "movntdqa" || // Added in 5.0
152 Name.starts_with("pabs.") || // Added in 6.0
153 Name.starts_with("padds.") || // Added in 8.0
154 Name.starts_with("paddus.") || // Added in 8.0
155 Name.starts_with("pblendd.") || // Added in 3.7
156 Name == "pblendw" || // Added in 3.7
157 Name.starts_with("pbroadcast") || // Added in 3.8
158 Name.starts_with("pcmpeq.") || // Added in 3.1
159 Name.starts_with("pcmpgt.") || // Added in 3.1
160 Name.starts_with("pmax") || // Added in 3.9
161 Name.starts_with("pmin") || // Added in 3.9
162 Name.starts_with("pmovsx") || // Added in 3.9
163 Name.starts_with("pmovzx") || // Added in 3.9
164 Name == "pmul.dq" || // Added in 7.0
165 Name == "pmulu.dq" || // Added in 7.0
166 Name.starts_with("psll.dq") || // Added in 3.7
167 Name.starts_with("psrl.dq") || // Added in 3.7
168 Name.starts_with("psubs.") || // Added in 8.0
169 Name.starts_with("psubus.") || // Added in 8.0
170 Name.starts_with("vbroadcast") || // Added in 3.8
171 Name == "vbroadcasti128" || // Added in 3.7
172 Name == "vextracti128" || // Added in 3.7
173 Name == "vinserti128" || // Added in 3.7
174 Name == "vperm2i128"); // Added in 6.0
175
176 if (Name.consume_front("avx512.")) {
177 if (Name.consume_front("mask."))
178 // 'avx512.mask.*'
179 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
180 Name.starts_with("and.") || // Added in 3.9
181 Name.starts_with("andn.") || // Added in 3.9
182 Name.starts_with("broadcast.s") || // Added in 3.9
183 Name.starts_with("broadcastf32x4.") || // Added in 6.0
184 Name.starts_with("broadcastf32x8.") || // Added in 6.0
185 Name.starts_with("broadcastf64x2.") || // Added in 6.0
186 Name.starts_with("broadcastf64x4.") || // Added in 6.0
187 Name.starts_with("broadcasti32x4.") || // Added in 6.0
188 Name.starts_with("broadcasti32x8.") || // Added in 6.0
189 Name.starts_with("broadcasti64x2.") || // Added in 6.0
190 Name.starts_with("broadcasti64x4.") || // Added in 6.0
191 Name.starts_with("cmp.b") || // Added in 5.0
192 Name.starts_with("cmp.d") || // Added in 5.0
193 Name.starts_with("cmp.q") || // Added in 5.0
194 Name.starts_with("cmp.w") || // Added in 5.0
195 Name.starts_with("compress.b") || // Added in 9.0
196 Name.starts_with("compress.d") || // Added in 9.0
197 Name.starts_with("compress.p") || // Added in 9.0
198 Name.starts_with("compress.q") || // Added in 9.0
199 Name.starts_with("compress.store.") || // Added in 7.0
200 Name.starts_with("compress.w") || // Added in 9.0
201 Name.starts_with("conflict.") || // Added in 9.0
202 Name.starts_with("cvtdq2pd.") || // Added in 4.0
203 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
204 Name == "cvtpd2dq.256" || // Added in 7.0
205 Name == "cvtpd2ps.256" || // Added in 7.0
206 Name == "cvtps2pd.128" || // Added in 7.0
207 Name == "cvtps2pd.256" || // Added in 7.0
208 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
209 Name == "cvtqq2ps.256" || // Added in 9.0
210 Name == "cvtqq2ps.512" || // Added in 9.0
211 Name == "cvttpd2dq.256" || // Added in 7.0
212 Name == "cvttps2dq.128" || // Added in 7.0
213 Name == "cvttps2dq.256" || // Added in 7.0
214 Name.starts_with("cvtudq2pd.") || // Added in 4.0
215 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
216 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
217 Name == "cvtuqq2ps.256" || // Added in 9.0
218 Name == "cvtuqq2ps.512" || // Added in 9.0
219 Name.starts_with("dbpsadbw.") || // Added in 7.0
220 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
221 Name.starts_with("expand.b") || // Added in 9.0
222 Name.starts_with("expand.d") || // Added in 9.0
223 Name.starts_with("expand.load.") || // Added in 7.0
224 Name.starts_with("expand.p") || // Added in 9.0
225 Name.starts_with("expand.q") || // Added in 9.0
226 Name.starts_with("expand.w") || // Added in 9.0
227 Name.starts_with("fpclass.p") || // Added in 7.0
228 Name.starts_with("insert") || // Added in 4.0
229 Name.starts_with("load.") || // Added in 3.9
230 Name.starts_with("loadu.") || // Added in 3.9
231 Name.starts_with("lzcnt.") || // Added in 5.0
232 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
233 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
234 Name.starts_with("movddup") || // Added in 3.9
235 Name.starts_with("move.s") || // Added in 4.0
236 Name.starts_with("movshdup") || // Added in 3.9
237 Name.starts_with("movsldup") || // Added in 3.9
238 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
239 Name.starts_with("or.") || // Added in 3.9
240 Name.starts_with("pabs.") || // Added in 6.0
241 Name.starts_with("packssdw.") || // Added in 5.0
242 Name.starts_with("packsswb.") || // Added in 5.0
243 Name.starts_with("packusdw.") || // Added in 5.0
244 Name.starts_with("packuswb.") || // Added in 5.0
245 Name.starts_with("padd.") || // Added in 4.0
246 Name.starts_with("padds.") || // Added in 8.0
247 Name.starts_with("paddus.") || // Added in 8.0
248 Name.starts_with("palignr.") || // Added in 3.9
249 Name.starts_with("pand.") || // Added in 3.9
250 Name.starts_with("pandn.") || // Added in 3.9
251 Name.starts_with("pavg") || // Added in 6.0
252 Name.starts_with("pbroadcast") || // Added in 6.0
253 Name.starts_with("pcmpeq.") || // Added in 3.9
254 Name.starts_with("pcmpgt.") || // Added in 3.9
255 Name.starts_with("perm.df.") || // Added in 3.9
256 Name.starts_with("perm.di.") || // Added in 3.9
257 Name.starts_with("permvar.") || // Added in 7.0
258 Name.starts_with("pmaddubs.w.") || // Added in 7.0
259 Name.starts_with("pmaddw.d.") || // Added in 7.0
260 Name.starts_with("pmax") || // Added in 4.0
261 Name.starts_with("pmin") || // Added in 4.0
262 Name == "pmov.qd.256" || // Added in 9.0
263 Name == "pmov.qd.512" || // Added in 9.0
264 Name == "pmov.wb.256" || // Added in 9.0
265 Name == "pmov.wb.512" || // Added in 9.0
266 Name.starts_with("pmovsx") || // Added in 4.0
267 Name.starts_with("pmovzx") || // Added in 4.0
268 Name.starts_with("pmul.dq.") || // Added in 4.0
269 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
270 Name.starts_with("pmulh.w.") || // Added in 7.0
271 Name.starts_with("pmulhu.w.") || // Added in 7.0
272 Name.starts_with("pmull.") || // Added in 4.0
273 Name.starts_with("pmultishift.qb.") || // Added in 8.0
274 Name.starts_with("pmulu.dq.") || // Added in 4.0
275 Name.starts_with("por.") || // Added in 3.9
276 Name.starts_with("prol.") || // Added in 8.0
277 Name.starts_with("prolv.") || // Added in 8.0
278 Name.starts_with("pror.") || // Added in 8.0
279 Name.starts_with("prorv.") || // Added in 8.0
280 Name.starts_with("pshuf.b.") || // Added in 4.0
281 Name.starts_with("pshuf.d.") || // Added in 3.9
282 Name.starts_with("pshufh.w.") || // Added in 3.9
283 Name.starts_with("pshufl.w.") || // Added in 3.9
284 Name.starts_with("psll.d") || // Added in 4.0
285 Name.starts_with("psll.q") || // Added in 4.0
286 Name.starts_with("psll.w") || // Added in 4.0
287 Name.starts_with("pslli") || // Added in 4.0
288 Name.starts_with("psllv") || // Added in 4.0
289 Name.starts_with("psra.d") || // Added in 4.0
290 Name.starts_with("psra.q") || // Added in 4.0
291 Name.starts_with("psra.w") || // Added in 4.0
292 Name.starts_with("psrai") || // Added in 4.0
293 Name.starts_with("psrav") || // Added in 4.0
294 Name.starts_with("psrl.d") || // Added in 4.0
295 Name.starts_with("psrl.q") || // Added in 4.0
296 Name.starts_with("psrl.w") || // Added in 4.0
297 Name.starts_with("psrli") || // Added in 4.0
298 Name.starts_with("psrlv") || // Added in 4.0
299 Name.starts_with("psub.") || // Added in 4.0
300 Name.starts_with("psubs.") || // Added in 8.0
301 Name.starts_with("psubus.") || // Added in 8.0
302 Name.starts_with("pternlog.") || // Added in 7.0
303 Name.starts_with("punpckh") || // Added in 3.9
304 Name.starts_with("punpckl") || // Added in 3.9
305 Name.starts_with("pxor.") || // Added in 3.9
306 Name.starts_with("shuf.f") || // Added in 6.0
307 Name.starts_with("shuf.i") || // Added in 6.0
308 Name.starts_with("shuf.p") || // Added in 4.0
309 Name.starts_with("sqrt.p") || // Added in 7.0
310 Name.starts_with("store.b.") || // Added in 3.9
311 Name.starts_with("store.d.") || // Added in 3.9
312 Name.starts_with("store.p") || // Added in 3.9
313 Name.starts_with("store.q.") || // Added in 3.9
314 Name.starts_with("store.w.") || // Added in 3.9
315 Name == "store.ss" || // Added in 7.0
316 Name.starts_with("storeu.") || // Added in 3.9
317 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
318 Name.starts_with("ucmp.") || // Added in 5.0
319 Name.starts_with("unpckh.") || // Added in 3.9
320 Name.starts_with("unpckl.") || // Added in 3.9
321 Name.starts_with("valign.") || // Added in 4.0
322 Name == "vcvtph2ps.128" || // Added in 11.0
323 Name == "vcvtph2ps.256" || // Added in 11.0
324 Name.starts_with("vextract") || // Added in 4.0
325 Name.starts_with("vfmadd.") || // Added in 7.0
326 Name.starts_with("vfmaddsub.") || // Added in 7.0
327 Name.starts_with("vfnmadd.") || // Added in 7.0
328 Name.starts_with("vfnmsub.") || // Added in 7.0
329 Name.starts_with("vpdpbusd.") || // Added in 7.0
330 Name.starts_with("vpdpbusds.") || // Added in 7.0
331 Name.starts_with("vpdpwssd.") || // Added in 7.0
332 Name.starts_with("vpdpwssds.") || // Added in 7.0
333 Name.starts_with("vpermi2var.") || // Added in 7.0
334 Name.starts_with("vpermil.p") || // Added in 3.9
335 Name.starts_with("vpermilvar.") || // Added in 4.0
336 Name.starts_with("vpermt2var.") || // Added in 7.0
337 Name.starts_with("vpmadd52") || // Added in 7.0
338 Name.starts_with("vpshld.") || // Added in 7.0
339 Name.starts_with("vpshldv.") || // Added in 8.0
340 Name.starts_with("vpshrd.") || // Added in 7.0
341 Name.starts_with("vpshrdv.") || // Added in 8.0
342 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
343 Name.starts_with("xor.")); // Added in 3.9
344
345 if (Name.consume_front("mask3."))
346 // 'avx512.mask3.*'
347 return (Name.starts_with("vfmadd.") || // Added in 7.0
348 Name.starts_with("vfmaddsub.") || // Added in 7.0
349 Name.starts_with("vfmsub.") || // Added in 7.0
350 Name.starts_with("vfmsubadd.") || // Added in 7.0
351 Name.starts_with("vfnmsub.")); // Added in 7.0
352
353 if (Name.consume_front("maskz."))
354 // 'avx512.maskz.*'
355 return (Name.starts_with("pternlog.") || // Added in 7.0
356 Name.starts_with("vfmadd.") || // Added in 7.0
357 Name.starts_with("vfmaddsub.") || // Added in 7.0
358 Name.starts_with("vpdpbusd.") || // Added in 7.0
359 Name.starts_with("vpdpbusds.") || // Added in 7.0
360 Name.starts_with("vpdpwssd.") || // Added in 7.0
361 Name.starts_with("vpdpwssds.") || // Added in 7.0
362 Name.starts_with("vpermt2var.") || // Added in 7.0
363 Name.starts_with("vpmadd52") || // Added in 7.0
364 Name.starts_with("vpshldv.") || // Added in 8.0
365 Name.starts_with("vpshrdv.")); // Added in 8.0
366
367 // 'avx512.*'
368 return (Name == "movntdqa" || // Added in 5.0
369 Name == "pmul.dq.512" || // Added in 7.0
370 Name == "pmulu.dq.512" || // Added in 7.0
371 Name.starts_with("broadcastm") || // Added in 6.0
372 Name.starts_with("cmp.p") || // Added in 12.0
373 Name.starts_with("cvtb2mask.") || // Added in 7.0
374 Name.starts_with("cvtd2mask.") || // Added in 7.0
375 Name.starts_with("cvtmask2") || // Added in 5.0
376 Name.starts_with("cvtq2mask.") || // Added in 7.0
377 Name == "cvtusi2sd" || // Added in 7.0
378 Name.starts_with("cvtw2mask.") || // Added in 7.0
379 Name == "kand.w" || // Added in 7.0
380 Name == "kandn.w" || // Added in 7.0
381 Name == "knot.w" || // Added in 7.0
382 Name == "kor.w" || // Added in 7.0
383 Name == "kortestc.w" || // Added in 7.0
384 Name == "kortestz.w" || // Added in 7.0
385 Name.starts_with("kunpck") || // added in 6.0
386 Name == "kxnor.w" || // Added in 7.0
387 Name == "kxor.w" || // Added in 7.0
388 Name.starts_with("padds.") || // Added in 8.0
389 Name.starts_with("pbroadcast") || // Added in 3.9
390 Name.starts_with("prol") || // Added in 8.0
391 Name.starts_with("pror") || // Added in 8.0
392 Name.starts_with("psll.dq") || // Added in 3.9
393 Name.starts_with("psrl.dq") || // Added in 3.9
394 Name.starts_with("psubs.") || // Added in 8.0
395 Name.starts_with("ptestm") || // Added in 6.0
396 Name.starts_with("ptestnm") || // Added in 6.0
397 Name.starts_with("storent.") || // Added in 3.9
398 Name.starts_with("vbroadcast.s") || // Added in 7.0
399 Name.starts_with("vpshld.") || // Added in 8.0
400 Name.starts_with("vpshrd.")); // Added in 8.0
401 }
402
403 if (Name.consume_front("fma."))
404 return (Name.starts_with("vfmadd.") || // Added in 7.0
405 Name.starts_with("vfmsub.") || // Added in 7.0
406 Name.starts_with("vfmsubadd.") || // Added in 7.0
407 Name.starts_with("vfnmadd.") || // Added in 7.0
408 Name.starts_with("vfnmsub.")); // Added in 7.0
409
410 if (Name.consume_front("fma4."))
411 return Name.starts_with("vfmadd.s"); // Added in 7.0
412
413 if (Name.consume_front("sse."))
414 return (Name == "add.ss" || // Added in 4.0
415 Name == "cvtsi2ss" || // Added in 7.0
416 Name == "cvtsi642ss" || // Added in 7.0
417 Name == "div.ss" || // Added in 4.0
418 Name == "mul.ss" || // Added in 4.0
419 Name.starts_with("sqrt.p") || // Added in 7.0
420 Name == "sqrt.ss" || // Added in 7.0
421 Name.starts_with("storeu.") || // Added in 3.9
422 Name == "sub.ss"); // Added in 4.0
423
424 if (Name.consume_front("sse2."))
425 return (Name == "add.sd" || // Added in 4.0
426 Name == "cvtdq2pd" || // Added in 3.9
427 Name == "cvtdq2ps" || // Added in 7.0
428 Name == "cvtps2pd" || // Added in 3.9
429 Name == "cvtsi2sd" || // Added in 7.0
430 Name == "cvtsi642sd" || // Added in 7.0
431 Name == "cvtss2sd" || // Added in 7.0
432 Name == "div.sd" || // Added in 4.0
433 Name == "mul.sd" || // Added in 4.0
434 Name.starts_with("padds.") || // Added in 8.0
435 Name.starts_with("paddus.") || // Added in 8.0
436 Name.starts_with("pcmpeq.") || // Added in 3.1
437 Name.starts_with("pcmpgt.") || // Added in 3.1
438 Name == "pmaxs.w" || // Added in 3.9
439 Name == "pmaxu.b" || // Added in 3.9
440 Name == "pmins.w" || // Added in 3.9
441 Name == "pminu.b" || // Added in 3.9
442 Name == "pmulu.dq" || // Added in 7.0
443 Name.starts_with("pshuf") || // Added in 3.9
444 Name.starts_with("psll.dq") || // Added in 3.7
445 Name.starts_with("psrl.dq") || // Added in 3.7
446 Name.starts_with("psubs.") || // Added in 8.0
447 Name.starts_with("psubus.") || // Added in 8.0
448 Name.starts_with("sqrt.p") || // Added in 7.0
449 Name == "sqrt.sd" || // Added in 7.0
450 Name == "storel.dq" || // Added in 3.9
451 Name.starts_with("storeu.") || // Added in 3.9
452 Name == "sub.sd"); // Added in 4.0
453
454 if (Name.consume_front("sse41."))
455 return (Name.starts_with("blendp") || // Added in 3.7
456 Name == "movntdqa" || // Added in 5.0
457 Name == "pblendw" || // Added in 3.7
458 Name == "pmaxsb" || // Added in 3.9
459 Name == "pmaxsd" || // Added in 3.9
460 Name == "pmaxud" || // Added in 3.9
461 Name == "pmaxuw" || // Added in 3.9
462 Name == "pminsb" || // Added in 3.9
463 Name == "pminsd" || // Added in 3.9
464 Name == "pminud" || // Added in 3.9
465 Name == "pminuw" || // Added in 3.9
466 Name.starts_with("pmovsx") || // Added in 3.8
467 Name.starts_with("pmovzx") || // Added in 3.9
468 Name == "pmuldq"); // Added in 7.0
469
470 if (Name.consume_front("sse42."))
471 return Name == "crc32.64.8"; // Added in 3.4
472
473 if (Name.consume_front("sse4a."))
474 return Name.starts_with("movnt."); // Added in 3.9
475
476 if (Name.consume_front("ssse3."))
477 return (Name == "pabs.b.128" || // Added in 6.0
478 Name == "pabs.d.128" || // Added in 6.0
479 Name == "pabs.w.128"); // Added in 6.0
480
481 if (Name.consume_front("xop."))
482 return (Name == "vpcmov" || // Added in 3.8
483 Name == "vpcmov.256" || // Added in 5.0
484 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
485 Name.starts_with("vprot")); // Added in 8.0
486
487 return (Name == "addcarry.u32" || // Added in 8.0
488 Name == "addcarry.u64" || // Added in 8.0
489 Name == "addcarryx.u32" || // Added in 8.0
490 Name == "addcarryx.u64" || // Added in 8.0
491 Name == "subborrow.u32" || // Added in 8.0
492 Name == "subborrow.u64" || // Added in 8.0
493 Name.starts_with("vcvtph2ps.")); // Added in 11.0
494}
495
497 Function *&NewFn) {
498 // Only handle intrinsics that start with "x86.".
499 if (!Name.consume_front("x86."))
500 return false;
501
503 NewFn = nullptr;
504 return true;
505 }
506
507 if (Name == "rdtscp") { // Added in 8.0
508 // If this intrinsic has 0 operands, it's the new version.
509 if (F->getFunctionType()->getNumParams() == 0)
510 return false;
511
512 rename(F);
513 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
514 Intrinsic::x86_rdtscp);
515 return true;
516 }
517
519
520 // SSE4.1 ptest functions may have an old signature.
521 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
523 .Case("c", Intrinsic::x86_sse41_ptestc)
524 .Case("z", Intrinsic::x86_sse41_ptestz)
525 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
528 return upgradePTESTIntrinsic(F, ID, NewFn);
529
530 return false;
531 }
532
533 // Several blend and other instructions with masks used the wrong number of
534 // bits.
535
536 // Added in 3.6
538 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
539 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
540 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
541 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
542 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
543 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
546 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
547
548 if (Name.consume_front("avx512.mask.cmp.")) {
549 // Added in 7.0
551 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
552 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
553 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
554 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
555 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
556 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
559 return upgradeX86MaskedFPCompare(F, ID, NewFn);
560 return false; // No other 'x86.avx523.mask.cmp.*'.
561 }
562
563 if (Name.consume_front("avx512bf16.")) {
564 // Added in 9.0
566 .Case("cvtne2ps2bf16.128",
567 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
568 .Case("cvtne2ps2bf16.256",
569 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
570 .Case("cvtne2ps2bf16.512",
571 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
572 .Case("mask.cvtneps2bf16.128",
573 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
574 .Case("cvtneps2bf16.256",
575 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
576 .Case("cvtneps2bf16.512",
577 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
580 return upgradeX86BF16Intrinsic(F, ID, NewFn);
581
582 // Added in 9.0
584 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
585 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
586 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
589 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
590 return false; // No other 'x86.avx512bf16.*'.
591 }
592
593 if (Name.consume_front("xop.")) {
595 if (Name.starts_with("vpermil2")) { // Added in 3.9
596 // Upgrade any XOP PERMIL2 index operand still using a float/double
597 // vector.
598 auto Idx = F->getFunctionType()->getParamType(2);
599 if (Idx->isFPOrFPVectorTy()) {
600 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
601 unsigned EltSize = Idx->getScalarSizeInBits();
602 if (EltSize == 64 && IdxSize == 128)
603 ID = Intrinsic::x86_xop_vpermil2pd;
604 else if (EltSize == 32 && IdxSize == 128)
605 ID = Intrinsic::x86_xop_vpermil2ps;
606 else if (EltSize == 64 && IdxSize == 256)
607 ID = Intrinsic::x86_xop_vpermil2pd_256;
608 else
609 ID = Intrinsic::x86_xop_vpermil2ps_256;
610 }
611 } else if (F->arg_size() == 2)
612 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
614 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
615 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
617
619 rename(F);
620 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
621 return true;
622 }
623 return false; // No other 'x86.xop.*'
624 }
625
626 if (Name == "seh.recoverfp") {
627 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
628 Intrinsic::eh_recoverfp);
629 return true;
630 }
631
632 return false;
633}
634
635// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
636// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
639 Function *&NewFn) {
640 if (Name.starts_with("rbit")) {
641 // '(arm|aarch64).rbit'.
643 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
644 return true;
645 }
646
647 if (Name == "thread.pointer") {
648 // '(arm|aarch64).thread.pointer'.
650 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
651 return true;
652 }
653
654 bool Neon = Name.consume_front("neon.");
655 if (Neon) {
656 // '(arm|aarch64).neon.*'.
657 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
658 // v16i8 respectively.
659 if (Name.consume_front("bfdot.")) {
660 // (arm|aarch64).neon.bfdot.*'.
663 .Cases("v2f32.v8i8", "v4f32.v16i8",
664 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
665 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
668 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
669 assert((OperandWidth == 64 || OperandWidth == 128) &&
670 "Unexpected operand width");
671 LLVMContext &Ctx = F->getParent()->getContext();
672 std::array<Type *, 2> Tys{
673 {F->getReturnType(),
674 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
675 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
676 return true;
677 }
678 return false; // No other '(arm|aarch64).neon.bfdot.*'.
679 }
680
681 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
682 // anymore and accept v8bf16 instead of v16i8.
683 if (Name.consume_front("bfm")) {
684 // (arm|aarch64).neon.bfm*'.
685 if (Name.consume_back(".v4f32.v16i8")) {
686 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
689 .Case("mla",
690 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
691 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
692 .Case("lalb",
693 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
694 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
695 .Case("lalt",
696 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
697 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
700 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
701 return true;
702 }
703 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
704 }
705 return false; // No other '(arm|aarch64).neon.bfm*.
706 }
707 // Continue on to Aarch64 Neon or Arm Neon.
708 }
709 // Continue on to Arm or Aarch64.
710
711 if (IsArm) {
712 // 'arm.*'.
713 if (Neon) {
714 // 'arm.neon.*'.
716 .StartsWith("vclz.", Intrinsic::ctlz)
717 .StartsWith("vcnt.", Intrinsic::ctpop)
718 .StartsWith("vqadds.", Intrinsic::sadd_sat)
719 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
720 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
721 .StartsWith("vqsubu.", Intrinsic::usub_sat)
722 .StartsWith("vrinta.", Intrinsic::round)
723 .StartsWith("vrintn.", Intrinsic::roundeven)
724 .StartsWith("vrintm.", Intrinsic::floor)
725 .StartsWith("vrintp.", Intrinsic::ceil)
726 .StartsWith("vrintx.", Intrinsic::rint)
727 .StartsWith("vrintz.", Intrinsic::trunc)
730 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
731 F->arg_begin()->getType());
732 return true;
733 }
734
735 if (Name.consume_front("vst")) {
736 // 'arm.neon.vst*'.
737 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
739 if (vstRegex.match(Name, &Groups)) {
740 static const Intrinsic::ID StoreInts[] = {
741 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
742 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
743
744 static const Intrinsic::ID StoreLaneInts[] = {
745 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
746 Intrinsic::arm_neon_vst4lane};
747
748 auto fArgs = F->getFunctionType()->params();
749 Type *Tys[] = {fArgs[0], fArgs[1]};
750 if (Groups[1].size() == 1)
752 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
753 else
755 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
756 return true;
757 }
758 return false; // No other 'arm.neon.vst*'.
759 }
760
761 return false; // No other 'arm.neon.*'.
762 }
763
764 if (Name.consume_front("mve.")) {
765 // 'arm.mve.*'.
766 if (Name == "vctp64") {
767 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
768 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
769 // the function and deal with it below in UpgradeIntrinsicCall.
770 rename(F);
771 return true;
772 }
773 return false; // Not 'arm.mve.vctp64'.
774 }
775
776 if (Name.starts_with("vrintn.v")) {
778 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
779 return true;
780 }
781
782 // These too are changed to accept a v2i1 instead of the old v4i1.
783 if (Name.consume_back(".v4i1")) {
784 // 'arm.mve.*.v4i1'.
785 if (Name.consume_back(".predicated.v2i64.v4i32"))
786 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
787 return Name == "mull.int" || Name == "vqdmull";
788
789 if (Name.consume_back(".v2i64")) {
790 // 'arm.mve.*.v2i64.v4i1'
791 bool IsGather = Name.consume_front("vldr.gather.");
792 if (IsGather || Name.consume_front("vstr.scatter.")) {
793 if (Name.consume_front("base.")) {
794 // Optional 'wb.' prefix.
795 Name.consume_front("wb.");
796 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
797 // predicated.v2i64.v2i64.v4i1'.
798 return Name == "predicated.v2i64";
799 }
800
801 if (Name.consume_front("offset.predicated."))
802 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
803 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
804
805 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
806 return false;
807 }
808
809 return false; // No other 'arm.mve.*.v2i64.v4i1'.
810 }
811 return false; // No other 'arm.mve.*.v4i1'.
812 }
813 return false; // No other 'arm.mve.*'.
814 }
815
816 if (Name.consume_front("cde.vcx")) {
817 // 'arm.cde.vcx*'.
818 if (Name.consume_back(".predicated.v2i64.v4i1"))
819 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
820 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
821 Name == "3q" || Name == "3qa";
822
823 return false; // No other 'arm.cde.vcx*'.
824 }
825 } else {
826 // 'aarch64.*'.
827 if (Neon) {
828 // 'aarch64.neon.*'.
830 .StartsWith("frintn", Intrinsic::roundeven)
831 .StartsWith("rbit", Intrinsic::bitreverse)
834 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
835 F->arg_begin()->getType());
836 return true;
837 }
838
839 if (Name.starts_with("addp")) {
840 // 'aarch64.neon.addp*'.
841 if (F->arg_size() != 2)
842 return false; // Invalid IR.
843 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
844 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
846 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
847 return true;
848 }
849 }
850
851 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
852 if (Name.starts_with("bfcvt")) {
853 NewFn = nullptr;
854 return true;
855 }
856
857 return false; // No other 'aarch64.neon.*'.
858 }
859 if (Name.consume_front("sve.")) {
860 // 'aarch64.sve.*'.
861 if (Name.consume_front("bf")) {
862 if (Name.consume_back(".lane")) {
863 // 'aarch64.sve.bf*.lane'.
866 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
867 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
868 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
871 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
872 return true;
873 }
874 return false; // No other 'aarch64.sve.bf*.lane'.
875 }
876 return false; // No other 'aarch64.sve.bf*'.
877 }
878
879 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
880 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
881 NewFn = nullptr;
882 return true;
883 }
884
885 if (Name.consume_front("addqv")) {
886 // 'aarch64.sve.addqv'.
887 if (!F->getReturnType()->isFPOrFPVectorTy())
888 return false;
889
890 auto Args = F->getFunctionType()->params();
891 Type *Tys[] = {F->getReturnType(), Args[1]};
893 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
894 return true;
895 }
896
897 if (Name.consume_front("ld")) {
898 // 'aarch64.sve.ld*'.
899 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
900 if (LdRegex.match(Name)) {
901 Type *ScalarTy =
902 cast<VectorType>(F->getReturnType())->getElementType();
903 ElementCount EC =
904 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
905 Type *Ty = VectorType::get(ScalarTy, EC);
906 static const Intrinsic::ID LoadIDs[] = {
907 Intrinsic::aarch64_sve_ld2_sret,
908 Intrinsic::aarch64_sve_ld3_sret,
909 Intrinsic::aarch64_sve_ld4_sret,
910 };
911 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
912 LoadIDs[Name[0] - '2'], Ty);
913 return true;
914 }
915 return false; // No other 'aarch64.sve.ld*'.
916 }
917
918 if (Name.consume_front("tuple.")) {
919 // 'aarch64.sve.tuple.*'.
920 if (Name.starts_with("get")) {
921 // 'aarch64.sve.tuple.get*'.
922 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
924 F->getParent(), Intrinsic::vector_extract, Tys);
925 return true;
926 }
927
928 if (Name.starts_with("set")) {
929 // 'aarch64.sve.tuple.set*'.
930 auto Args = F->getFunctionType()->params();
931 Type *Tys[] = {Args[0], Args[2], Args[1]};
933 F->getParent(), Intrinsic::vector_insert, Tys);
934 return true;
935 }
936
937 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
938 if (CreateTupleRegex.match(Name)) {
939 // 'aarch64.sve.tuple.create*'.
940 auto Args = F->getFunctionType()->params();
941 Type *Tys[] = {F->getReturnType(), Args[1]};
943 F->getParent(), Intrinsic::vector_insert, Tys);
944 return true;
945 }
946 return false; // No other 'aarch64.sve.tuple.*'.
947 }
948 return false; // No other 'aarch64.sve.*'.
949 }
950 }
951 return false; // No other 'arm.*', 'aarch64.*'.
952}
953
955 StringRef Name) {
956 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
959 .Case("im2col.3d",
960 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
961 .Case("im2col.4d",
962 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
963 .Case("im2col.5d",
964 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
965 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
966 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
967 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
968 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
969 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
971
973 return ID;
974
975 // These intrinsics may need upgrade for two reasons:
976 // (1) When the address-space of the first argument is shared[AS=3]
977 // (and we upgrade it to use shared_cluster address-space[AS=7])
978 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
980 return ID;
981
982 // (2) When there are only two boolean flag arguments at the end:
983 //
984 // The last three parameters of the older version of these
985 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
986 //
987 // The newer version reads as:
988 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
989 //
990 // So, when the type of the [N-3]rd argument is "not i1", then
991 // it is the older version and we need to upgrade.
992 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
993 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
994 if (!ArgType->isIntegerTy(1))
995 return ID;
996 }
997
999}
1000
1002 StringRef Name) {
1003 if (Name.consume_front("mapa.shared.cluster"))
1004 if (F->getReturnType()->getPointerAddressSpace() ==
1006 return Intrinsic::nvvm_mapa_shared_cluster;
1007
1008 if (Name.consume_front("cp.async.bulk.")) {
1011 .Case("global.to.shared.cluster",
1012 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1013 .Case("shared.cta.to.cluster",
1014 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1016
1018 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1020 return ID;
1021 }
1022
1024}
1025
1027 if (Name.consume_front("fma.rn."))
1029 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1030 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1031 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1032 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1033 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1034 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1035 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1036 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1037 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1038 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1039 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1040 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1042
1043 if (Name.consume_front("fmax."))
1045 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1046 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1047 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1048 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1049 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1050 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1051 .Case("ftz.nan.xorsign.abs.bf16",
1052 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1053 .Case("ftz.nan.xorsign.abs.bf16x2",
1054 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1055 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1056 .Case("ftz.xorsign.abs.bf16x2",
1057 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1058 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1059 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1060 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1061 .Case("nan.xorsign.abs.bf16x2",
1062 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1063 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1064 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1066
1067 if (Name.consume_front("fmin."))
1069 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1070 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1071 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1072 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1073 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1074 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1075 .Case("ftz.nan.xorsign.abs.bf16",
1076 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1077 .Case("ftz.nan.xorsign.abs.bf16x2",
1078 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1079 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1080 .Case("ftz.xorsign.abs.bf16x2",
1081 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1082 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1083 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1084 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1085 .Case("nan.xorsign.abs.bf16x2",
1086 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1087 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1088 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1090
1091 if (Name.consume_front("neg."))
1093 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1094 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1096
1098}
1099
1101 return Name.consume_front("local") || Name.consume_front("shared") ||
1102 Name.consume_front("global") || Name.consume_front("constant") ||
1103 Name.consume_front("param");
1104}
1105
1107 bool CanUpgradeDebugIntrinsicsToRecords) {
1108 assert(F && "Illegal to upgrade a non-existent Function.");
1109
1110 StringRef Name = F->getName();
1111
1112 // Quickly eliminate it, if it's not a candidate.
1113 if (!Name.consume_front("llvm.") || Name.empty())
1114 return false;
1115
1116 switch (Name[0]) {
1117 default: break;
1118 case 'a': {
1119 bool IsArm = Name.consume_front("arm.");
1120 if (IsArm || Name.consume_front("aarch64.")) {
1121 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1122 return true;
1123 break;
1124 }
1125
1126 if (Name.consume_front("amdgcn.")) {
1127 if (Name == "alignbit") {
1128 // Target specific intrinsic became redundant
1130 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1131 return true;
1132 }
1133
1134 if (Name.consume_front("atomic.")) {
1135 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1136 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1137 // there's no new declaration.
1138 NewFn = nullptr;
1139 return true;
1140 }
1141 break; // No other 'amdgcn.atomic.*'
1142 }
1143
1144 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1145 Name.consume_front("flat.atomic.")) {
1146 if (Name.starts_with("fadd") ||
1147 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1148 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1149 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1150 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1151 // declaration.
1152 NewFn = nullptr;
1153 return true;
1154 }
1155 }
1156
1157 if (Name.starts_with("ldexp.")) {
1158 // Target specific intrinsic became redundant
1160 F->getParent(), Intrinsic::ldexp,
1161 {F->getReturnType(), F->getArg(1)->getType()});
1162 return true;
1163 }
1164 break; // No other 'amdgcn.*'
1165 }
1166
1167 break;
1168 }
1169 case 'c': {
1170 if (F->arg_size() == 1) {
1172 .StartsWith("ctlz.", Intrinsic::ctlz)
1173 .StartsWith("cttz.", Intrinsic::cttz)
1176 rename(F);
1177 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1178 F->arg_begin()->getType());
1179 return true;
1180 }
1181 }
1182
1183 if (F->arg_size() == 2 && Name == "coro.end") {
1184 rename(F);
1185 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1186 Intrinsic::coro_end);
1187 return true;
1188 }
1189
1190 break;
1191 }
1192 case 'd':
1193 if (Name.consume_front("dbg.")) {
1194 // Mark debug intrinsics for upgrade to new debug format.
1195 if (CanUpgradeDebugIntrinsicsToRecords) {
1196 if (Name == "addr" || Name == "value" || Name == "assign" ||
1197 Name == "declare" || Name == "label") {
1198 // There's no function to replace these with.
1199 NewFn = nullptr;
1200 // But we do want these to get upgraded.
1201 return true;
1202 }
1203 }
1204 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1205 // converted to DbgVariableRecords later.
1206 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1207 rename(F);
1208 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1209 Intrinsic::dbg_value);
1210 return true;
1211 }
1212 break; // No other 'dbg.*'.
1213 }
1214 break;
1215 case 'e':
1216 if (Name.consume_front("experimental.vector.")) {
1219 // Skip over extract.last.active, otherwise it will be 'upgraded'
1220 // to a regular vector extract which is a different operation.
1221 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1222 .StartsWith("extract.", Intrinsic::vector_extract)
1223 .StartsWith("insert.", Intrinsic::vector_insert)
1224 .StartsWith("splice.", Intrinsic::vector_splice)
1225 .StartsWith("reverse.", Intrinsic::vector_reverse)
1226 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1227 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1230 const auto *FT = F->getFunctionType();
1232 if (ID == Intrinsic::vector_extract ||
1233 ID == Intrinsic::vector_interleave2)
1234 // Extracting overloads the return type.
1235 Tys.push_back(FT->getReturnType());
1236 if (ID != Intrinsic::vector_interleave2)
1237 Tys.push_back(FT->getParamType(0));
1238 if (ID == Intrinsic::vector_insert)
1239 // Inserting overloads the inserted type.
1240 Tys.push_back(FT->getParamType(1));
1241 rename(F);
1242 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1243 return true;
1244 }
1245
1246 if (Name.consume_front("reduce.")) {
1248 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1249 if (R.match(Name, &Groups))
1251 .Case("add", Intrinsic::vector_reduce_add)
1252 .Case("mul", Intrinsic::vector_reduce_mul)
1253 .Case("and", Intrinsic::vector_reduce_and)
1254 .Case("or", Intrinsic::vector_reduce_or)
1255 .Case("xor", Intrinsic::vector_reduce_xor)
1256 .Case("smax", Intrinsic::vector_reduce_smax)
1257 .Case("smin", Intrinsic::vector_reduce_smin)
1258 .Case("umax", Intrinsic::vector_reduce_umax)
1259 .Case("umin", Intrinsic::vector_reduce_umin)
1260 .Case("fmax", Intrinsic::vector_reduce_fmax)
1261 .Case("fmin", Intrinsic::vector_reduce_fmin)
1263
1264 bool V2 = false;
1266 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1267 Groups.clear();
1268 V2 = true;
1269 if (R2.match(Name, &Groups))
1271 .Case("fadd", Intrinsic::vector_reduce_fadd)
1272 .Case("fmul", Intrinsic::vector_reduce_fmul)
1274 }
1276 rename(F);
1277 auto Args = F->getFunctionType()->params();
1278 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1279 {Args[V2 ? 1 : 0]});
1280 return true;
1281 }
1282 break; // No other 'expermental.vector.reduce.*'.
1283 }
1284 break; // No other 'experimental.vector.*'.
1285 }
1286 if (Name.consume_front("experimental.stepvector.")) {
1287 Intrinsic::ID ID = Intrinsic::stepvector;
1288 rename(F);
1290 F->getParent(), ID, F->getFunctionType()->getReturnType());
1291 return true;
1292 }
1293 break; // No other 'e*'.
1294 case 'f':
1295 if (Name.starts_with("flt.rounds")) {
1296 rename(F);
1297 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1298 Intrinsic::get_rounding);
1299 return true;
1300 }
1301 break;
1302 case 'i':
1303 if (Name.starts_with("invariant.group.barrier")) {
1304 // Rename invariant.group.barrier to launder.invariant.group
1305 auto Args = F->getFunctionType()->params();
1306 Type* ObjectPtr[1] = {Args[0]};
1307 rename(F);
1309 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1310 return true;
1311 }
1312 break;
1313 case 'l':
1314 if ((Name.starts_with("lifetime.start") ||
1315 Name.starts_with("lifetime.end")) &&
1316 F->arg_size() == 2) {
1317 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1318 ? Intrinsic::lifetime_start
1319 : Intrinsic::lifetime_end;
1320 rename(F);
1321 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1322 F->getArg(0)->getType());
1323 return true;
1324 }
1325 break;
1326 case 'm': {
1327 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1328 // alignment parameter to embedding the alignment as an attribute of
1329 // the pointer args.
1330 if (unsigned ID = StringSwitch<unsigned>(Name)
1331 .StartsWith("memcpy.", Intrinsic::memcpy)
1332 .StartsWith("memmove.", Intrinsic::memmove)
1333 .Default(0)) {
1334 if (F->arg_size() == 5) {
1335 rename(F);
1336 // Get the types of dest, src, and len
1337 ArrayRef<Type *> ParamTypes =
1338 F->getFunctionType()->params().slice(0, 3);
1339 NewFn =
1340 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1341 return true;
1342 }
1343 }
1344 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1345 rename(F);
1346 // Get the types of dest, and len
1347 const auto *FT = F->getFunctionType();
1348 Type *ParamTypes[2] = {
1349 FT->getParamType(0), // Dest
1350 FT->getParamType(2) // len
1351 };
1352 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1353 Intrinsic::memset, ParamTypes);
1354 return true;
1355 }
1356 break;
1357 }
1358 case 'n': {
1359 if (Name.consume_front("nvvm.")) {
1360 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1361 if (F->arg_size() == 1) {
1362 Intrinsic::ID IID =
1364 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1365 .Case("clz.i", Intrinsic::ctlz)
1366 .Case("popc.i", Intrinsic::ctpop)
1368 if (IID != Intrinsic::not_intrinsic) {
1369 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1370 {F->getReturnType()});
1371 return true;
1372 }
1373 }
1374
1375 // Check for nvvm intrinsics that need a return type adjustment.
1376 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1378 if (IID != Intrinsic::not_intrinsic) {
1379 NewFn = nullptr;
1380 return true;
1381 }
1382 }
1383
1384 // Upgrade Distributed Shared Memory Intrinsics
1386 if (IID != Intrinsic::not_intrinsic) {
1387 rename(F);
1388 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1389 return true;
1390 }
1391
1392 // Upgrade TMA copy G2S Intrinsics
1394 if (IID != Intrinsic::not_intrinsic) {
1395 rename(F);
1396 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1397 return true;
1398 }
1399
1400 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1401 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1402 //
1403 // TODO: We could add lohi.i2d.
1404 bool Expand = false;
1405 if (Name.consume_front("abs."))
1406 // nvvm.abs.{i,ii}
1407 Expand =
1408 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1409 else if (Name.consume_front("fabs."))
1410 // nvvm.fabs.{f,ftz.f,d}
1411 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1412 else if (Name.consume_front("max.") || Name.consume_front("min."))
1413 // nvvm.{min,max}.{i,ii,ui,ull}
1414 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1415 Name == "ui" || Name == "ull";
1416 else if (Name.consume_front("atomic.load."))
1417 // nvvm.atomic.load.add.{f32,f64}.p
1418 // nvvm.atomic.load.{inc,dec}.32.p
1419 Expand = StringSwitch<bool>(Name)
1420 .StartsWith("add.f32.p", true)
1421 .StartsWith("add.f64.p", true)
1422 .StartsWith("inc.32.p", true)
1423 .StartsWith("dec.32.p", true)
1424 .Default(false);
1425 else if (Name.consume_front("bitcast."))
1426 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1427 Expand =
1428 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1429 else if (Name.consume_front("rotate."))
1430 // nvvm.rotate.{b32,b64,right.b64}
1431 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1432 else if (Name.consume_front("ptr.gen.to."))
1433 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1434 Expand = consumeNVVMPtrAddrSpace(Name);
1435 else if (Name.consume_front("ptr."))
1436 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1437 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1438 else if (Name.consume_front("ldg.global."))
1439 // nvvm.ldg.global.{i,p,f}
1440 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1441 Name.starts_with("p."));
1442 else
1443 Expand = StringSwitch<bool>(Name)
1444 .Case("barrier0", true)
1445 .Case("barrier.n", true)
1446 .Case("barrier.sync.cnt", true)
1447 .Case("barrier.sync", true)
1448 .Case("barrier", true)
1449 .Case("bar.sync", true)
1450 .Case("clz.ll", true)
1451 .Case("popc.ll", true)
1452 .Case("h2f", true)
1453 .Case("swap.lo.hi.b64", true)
1454 .Case("tanh.approx.f32", true)
1455 .Default(false);
1456
1457 if (Expand) {
1458 NewFn = nullptr;
1459 return true;
1460 }
1461 break; // No other 'nvvm.*'.
1462 }
1463 break;
1464 }
1465 case 'o':
1466 if (Name.starts_with("objectsize.")) {
1467 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1468 if (F->arg_size() == 2 || F->arg_size() == 3) {
1469 rename(F);
1470 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1471 Intrinsic::objectsize, Tys);
1472 return true;
1473 }
1474 }
1475 break;
1476
1477 case 'p':
1478 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1479 rename(F);
1481 F->getParent(), Intrinsic::ptr_annotation,
1482 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1483 return true;
1484 }
1485 break;
1486
1487 case 'r': {
1488 if (Name.consume_front("riscv.")) {
1491 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1492 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1493 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1494 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1497 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1498 rename(F);
1499 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1500 return true;
1501 }
1502 break; // No other applicable upgrades.
1503 }
1504
1506 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1507 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1510 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1511 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1512 rename(F);
1513 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1514 return true;
1515 }
1516 break; // No other applicable upgrades.
1517 }
1518
1520 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1521 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1522 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1523 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1524 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1525 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1528 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1529 rename(F);
1530 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1531 return true;
1532 }
1533 break; // No other applicable upgrades.
1534 }
1535 break; // No other 'riscv.*' intrinsics
1536 }
1537 } break;
1538
1539 case 's':
1540 if (Name == "stackprotectorcheck") {
1541 NewFn = nullptr;
1542 return true;
1543 }
1544 break;
1545
1546 case 't':
1547 if (Name == "thread.pointer") {
1549 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1550 return true;
1551 }
1552 break;
1553
1554 case 'v': {
1555 if (Name == "var.annotation" && F->arg_size() == 4) {
1556 rename(F);
1558 F->getParent(), Intrinsic::var_annotation,
1559 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1560 return true;
1561 }
1562 break;
1563 }
1564
1565 case 'w':
1566 if (Name.consume_front("wasm.")) {
1569 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1570 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1571 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1574 rename(F);
1575 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1576 F->getReturnType());
1577 return true;
1578 }
1579
1580 if (Name.consume_front("dot.i8x16.i7x16.")) {
1582 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1583 .Case("add.signed",
1584 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1587 rename(F);
1588 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1589 return true;
1590 }
1591 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1592 }
1593 break; // No other 'wasm.*'.
1594 }
1595 break;
1596
1597 case 'x':
1598 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1599 return true;
1600 }
1601
1602 auto *ST = dyn_cast<StructType>(F->getReturnType());
1603 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1604 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1605 // Replace return type with literal non-packed struct. Only do this for
1606 // intrinsics declared to return a struct, not for intrinsics with
1607 // overloaded return type, in which case the exact struct type will be
1608 // mangled into the name.
1611 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1612 auto *FT = F->getFunctionType();
1613 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1614 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1615 std::string Name = F->getName().str();
1616 rename(F);
1617 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1618 Name, F->getParent());
1619
1620 // The new function may also need remangling.
1621 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1622 NewFn = *Result;
1623 return true;
1624 }
1625 }
1626
1627 // Remangle our intrinsic since we upgrade the mangling
1629 if (Result != std::nullopt) {
1630 NewFn = *Result;
1631 return true;
1632 }
1633
1634 // This may not belong here. This function is effectively being overloaded
1635 // to both detect an intrinsic which needs upgrading, and to provide the
1636 // upgraded form of the intrinsic. We should perhaps have two separate
1637 // functions for this.
1638 return false;
1639}
1640
1642 bool CanUpgradeDebugIntrinsicsToRecords) {
1643 NewFn = nullptr;
1644 bool Upgraded =
1645 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1646
1647 // Upgrade intrinsic attributes. This does not change the function.
1648 if (NewFn)
1649 F = NewFn;
1650 if (Intrinsic::ID id = F->getIntrinsicID()) {
1651 // Only do this if the intrinsic signature is valid.
1652 SmallVector<Type *> OverloadTys;
1653 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1654 F->setAttributes(
1655 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1656 }
1657 return Upgraded;
1658}
1659
1661 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1662 GV->getName() == "llvm.global_dtors")) ||
1663 !GV->hasInitializer())
1664 return nullptr;
1665 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1666 if (!ATy)
1667 return nullptr;
1668 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1669 if (!STy || STy->getNumElements() != 2)
1670 return nullptr;
1671
1672 LLVMContext &C = GV->getContext();
1673 IRBuilder<> IRB(C);
1674 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1675 IRB.getPtrTy());
1676 Constant *Init = GV->getInitializer();
1677 unsigned N = Init->getNumOperands();
1678 std::vector<Constant *> NewCtors(N);
1679 for (unsigned i = 0; i != N; ++i) {
1680 auto Ctor = cast<Constant>(Init->getOperand(i));
1681 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1682 Ctor->getAggregateElement(1),
1684 }
1685 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1686
1687 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1688 NewInit, GV->getName());
1689}
1690
1691// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1692// to byte shuffles.
1694 unsigned Shift) {
1695 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1696 unsigned NumElts = ResultTy->getNumElements() * 8;
1697
1698 // Bitcast from a 64-bit element type to a byte element type.
1699 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1700 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1701
1702 // We'll be shuffling in zeroes.
1703 Value *Res = Constant::getNullValue(VecTy);
1704
1705 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1706 // we'll just return the zero vector.
1707 if (Shift < 16) {
1708 int Idxs[64];
1709 // 256/512-bit version is split into 2/4 16-byte lanes.
1710 for (unsigned l = 0; l != NumElts; l += 16)
1711 for (unsigned i = 0; i != 16; ++i) {
1712 unsigned Idx = NumElts + i - Shift;
1713 if (Idx < NumElts)
1714 Idx -= NumElts - 16; // end of lane, switch operand.
1715 Idxs[l + i] = Idx + l;
1716 }
1717
1718 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1719 }
1720
1721 // Bitcast back to a 64-bit element type.
1722 return Builder.CreateBitCast(Res, ResultTy, "cast");
1723}
1724
1725// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1726// to byte shuffles.
1728 unsigned Shift) {
1729 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1730 unsigned NumElts = ResultTy->getNumElements() * 8;
1731
1732 // Bitcast from a 64-bit element type to a byte element type.
1733 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1734 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1735
1736 // We'll be shuffling in zeroes.
1737 Value *Res = Constant::getNullValue(VecTy);
1738
1739 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1740 // we'll just return the zero vector.
1741 if (Shift < 16) {
1742 int Idxs[64];
1743 // 256/512-bit version is split into 2/4 16-byte lanes.
1744 for (unsigned l = 0; l != NumElts; l += 16)
1745 for (unsigned i = 0; i != 16; ++i) {
1746 unsigned Idx = i + Shift;
1747 if (Idx >= 16)
1748 Idx += NumElts - 16; // end of lane, switch operand.
1749 Idxs[l + i] = Idx + l;
1750 }
1751
1752 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1753 }
1754
1755 // Bitcast back to a 64-bit element type.
1756 return Builder.CreateBitCast(Res, ResultTy, "cast");
1757}
1758
1759static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1760 unsigned NumElts) {
1761 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1763 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1764 Mask = Builder.CreateBitCast(Mask, MaskTy);
1765
1766 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1767 // i8 and we need to extract down to the right number of elements.
1768 if (NumElts <= 4) {
1769 int Indices[4];
1770 for (unsigned i = 0; i != NumElts; ++i)
1771 Indices[i] = i;
1772 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1773 "extract");
1774 }
1775
1776 return Mask;
1777}
1778
1779static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1780 Value *Op1) {
1781 // If the mask is all ones just emit the first operation.
1782 if (const auto *C = dyn_cast<Constant>(Mask))
1783 if (C->isAllOnesValue())
1784 return Op0;
1785
1786 Mask = getX86MaskVec(Builder, Mask,
1787 cast<FixedVectorType>(Op0->getType())->getNumElements());
1788 return Builder.CreateSelect(Mask, Op0, Op1);
1789}
1790
1791static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1792 Value *Op1) {
1793 // If the mask is all ones just emit the first operation.
1794 if (const auto *C = dyn_cast<Constant>(Mask))
1795 if (C->isAllOnesValue())
1796 return Op0;
1797
1798 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1799 Mask->getType()->getIntegerBitWidth());
1800 Mask = Builder.CreateBitCast(Mask, MaskTy);
1801 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1802 return Builder.CreateSelect(Mask, Op0, Op1);
1803}
1804
1805// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1806// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1807// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1809 Value *Op1, Value *Shift,
1810 Value *Passthru, Value *Mask,
1811 bool IsVALIGN) {
1812 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1813
1814 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1815 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1816 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1817 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1818
1819 // Mask the immediate for VALIGN.
1820 if (IsVALIGN)
1821 ShiftVal &= (NumElts - 1);
1822
1823 // If palignr is shifting the pair of vectors more than the size of two
1824 // lanes, emit zero.
1825 if (ShiftVal >= 32)
1827
1828 // If palignr is shifting the pair of input vectors more than one lane,
1829 // but less than two lanes, convert to shifting in zeroes.
1830 if (ShiftVal > 16) {
1831 ShiftVal -= 16;
1832 Op1 = Op0;
1834 }
1835
1836 int Indices[64];
1837 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1838 for (unsigned l = 0; l < NumElts; l += 16) {
1839 for (unsigned i = 0; i != 16; ++i) {
1840 unsigned Idx = ShiftVal + i;
1841 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1842 Idx += NumElts - 16; // End of lane, switch operand.
1843 Indices[l + i] = Idx + l;
1844 }
1845 }
1846
1847 Value *Align = Builder.CreateShuffleVector(
1848 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1849
1850 return emitX86Select(Builder, Mask, Align, Passthru);
1851}
1852
1854 bool ZeroMask, bool IndexForm) {
1855 Type *Ty = CI.getType();
1856 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1857 unsigned EltWidth = Ty->getScalarSizeInBits();
1858 bool IsFloat = Ty->isFPOrFPVectorTy();
1859 Intrinsic::ID IID;
1860 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1861 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1862 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1863 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1864 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1865 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1866 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1867 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1868 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1869 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1870 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1871 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1872 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1873 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1874 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1875 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1876 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1877 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1878 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1879 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1880 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1881 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1882 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1883 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1884 else if (VecWidth == 128 && EltWidth == 16)
1885 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1886 else if (VecWidth == 256 && EltWidth == 16)
1887 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1888 else if (VecWidth == 512 && EltWidth == 16)
1889 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1890 else if (VecWidth == 128 && EltWidth == 8)
1891 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1892 else if (VecWidth == 256 && EltWidth == 8)
1893 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1894 else if (VecWidth == 512 && EltWidth == 8)
1895 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1896 else
1897 llvm_unreachable("Unexpected intrinsic");
1898
1899 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1900 CI.getArgOperand(2) };
1901
1902 // If this isn't index form we need to swap operand 0 and 1.
1903 if (!IndexForm)
1904 std::swap(Args[0], Args[1]);
1905
1906 Value *V = Builder.CreateIntrinsic(IID, Args);
1907 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1908 : Builder.CreateBitCast(CI.getArgOperand(1),
1909 Ty);
1910 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1911}
1912
1914 Intrinsic::ID IID) {
1915 Type *Ty = CI.getType();
1916 Value *Op0 = CI.getOperand(0);
1917 Value *Op1 = CI.getOperand(1);
1918 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
1919
1920 if (CI.arg_size() == 4) { // For masked intrinsics.
1921 Value *VecSrc = CI.getOperand(2);
1922 Value *Mask = CI.getOperand(3);
1923 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1924 }
1925 return Res;
1926}
1927
1929 bool IsRotateRight) {
1930 Type *Ty = CI.getType();
1931 Value *Src = CI.getArgOperand(0);
1932 Value *Amt = CI.getArgOperand(1);
1933
1934 // Amount may be scalar immediate, in which case create a splat vector.
1935 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1936 // we only care about the lowest log2 bits anyway.
1937 if (Amt->getType() != Ty) {
1938 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1939 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1940 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1941 }
1942
1943 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1944 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
1945
1946 if (CI.arg_size() == 4) { // For masked intrinsics.
1947 Value *VecSrc = CI.getOperand(2);
1948 Value *Mask = CI.getOperand(3);
1949 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1950 }
1951 return Res;
1952}
1953
1954static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1955 bool IsSigned) {
1956 Type *Ty = CI.getType();
1957 Value *LHS = CI.getArgOperand(0);
1958 Value *RHS = CI.getArgOperand(1);
1959
1960 CmpInst::Predicate Pred;
1961 switch (Imm) {
1962 case 0x0:
1963 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1964 break;
1965 case 0x1:
1966 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1967 break;
1968 case 0x2:
1969 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1970 break;
1971 case 0x3:
1972 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1973 break;
1974 case 0x4:
1975 Pred = ICmpInst::ICMP_EQ;
1976 break;
1977 case 0x5:
1978 Pred = ICmpInst::ICMP_NE;
1979 break;
1980 case 0x6:
1981 return Constant::getNullValue(Ty); // FALSE
1982 case 0x7:
1983 return Constant::getAllOnesValue(Ty); // TRUE
1984 default:
1985 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1986 }
1987
1988 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1989 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1990 return Ext;
1991}
1992
1994 bool IsShiftRight, bool ZeroMask) {
1995 Type *Ty = CI.getType();
1996 Value *Op0 = CI.getArgOperand(0);
1997 Value *Op1 = CI.getArgOperand(1);
1998 Value *Amt = CI.getArgOperand(2);
1999
2000 if (IsShiftRight)
2001 std::swap(Op0, Op1);
2002
2003 // Amount may be scalar immediate, in which case create a splat vector.
2004 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2005 // we only care about the lowest log2 bits anyway.
2006 if (Amt->getType() != Ty) {
2007 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2008 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2009 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2010 }
2011
2012 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2013 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2014
2015 unsigned NumArgs = CI.arg_size();
2016 if (NumArgs >= 4) { // For masked intrinsics.
2017 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2018 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2019 CI.getArgOperand(0);
2020 Value *Mask = CI.getOperand(NumArgs - 1);
2021 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2022 }
2023 return Res;
2024}
2025
2027 Value *Mask, bool Aligned) {
2028 const Align Alignment =
2029 Aligned
2030 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2031 : Align(1);
2032
2033 // If the mask is all ones just emit a regular store.
2034 if (const auto *C = dyn_cast<Constant>(Mask))
2035 if (C->isAllOnesValue())
2036 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2037
2038 // Convert the mask from an integer type to a vector of i1.
2039 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2040 Mask = getX86MaskVec(Builder, Mask, NumElts);
2041 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2042}
2043
2045 Value *Passthru, Value *Mask, bool Aligned) {
2046 Type *ValTy = Passthru->getType();
2047 const Align Alignment =
2048 Aligned
2049 ? Align(
2051 8)
2052 : Align(1);
2053
2054 // If the mask is all ones just emit a regular store.
2055 if (const auto *C = dyn_cast<Constant>(Mask))
2056 if (C->isAllOnesValue())
2057 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2058
2059 // Convert the mask from an integer type to a vector of i1.
2060 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2061 Mask = getX86MaskVec(Builder, Mask, NumElts);
2062 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2063}
2064
2065static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2066 Type *Ty = CI.getType();
2067 Value *Op0 = CI.getArgOperand(0);
2068 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2069 {Op0, Builder.getInt1(false)});
2070 if (CI.arg_size() == 3)
2071 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2072 return Res;
2073}
2074
2075static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2076 Type *Ty = CI.getType();
2077
2078 // Arguments have a vXi32 type so cast to vXi64.
2079 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2080 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2081
2082 if (IsSigned) {
2083 // Shift left then arithmetic shift right.
2084 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2085 LHS = Builder.CreateShl(LHS, ShiftAmt);
2086 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2087 RHS = Builder.CreateShl(RHS, ShiftAmt);
2088 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2089 } else {
2090 // Clear the upper bits.
2091 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2092 LHS = Builder.CreateAnd(LHS, Mask);
2093 RHS = Builder.CreateAnd(RHS, Mask);
2094 }
2095
2096 Value *Res = Builder.CreateMul(LHS, RHS);
2097
2098 if (CI.arg_size() == 4)
2099 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2100
2101 return Res;
2102}
2103
2104// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2106 Value *Mask) {
2107 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2108 if (Mask) {
2109 const auto *C = dyn_cast<Constant>(Mask);
2110 if (!C || !C->isAllOnesValue())
2111 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2112 }
2113
2114 if (NumElts < 8) {
2115 int Indices[8];
2116 for (unsigned i = 0; i != NumElts; ++i)
2117 Indices[i] = i;
2118 for (unsigned i = NumElts; i != 8; ++i)
2119 Indices[i] = NumElts + i % NumElts;
2120 Vec = Builder.CreateShuffleVector(Vec,
2122 Indices);
2123 }
2124 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2125}
2126
2128 unsigned CC, bool Signed) {
2129 Value *Op0 = CI.getArgOperand(0);
2130 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2131
2132 Value *Cmp;
2133 if (CC == 3) {
2135 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2136 } else if (CC == 7) {
2138 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2139 } else {
2141 switch (CC) {
2142 default: llvm_unreachable("Unknown condition code");
2143 case 0: Pred = ICmpInst::ICMP_EQ; break;
2144 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2145 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2146 case 4: Pred = ICmpInst::ICMP_NE; break;
2147 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2148 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2149 }
2150 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2151 }
2152
2153 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2154
2155 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2156}
2157
2158// Replace a masked intrinsic with an older unmasked intrinsic.
2160 Intrinsic::ID IID) {
2161 Value *Rep =
2162 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2163 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2164}
2165
2167 Value* A = CI.getArgOperand(0);
2168 Value* B = CI.getArgOperand(1);
2169 Value* Src = CI.getArgOperand(2);
2170 Value* Mask = CI.getArgOperand(3);
2171
2172 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2173 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2174 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2175 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2176 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2177 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2178}
2179
2181 Value* Op = CI.getArgOperand(0);
2182 Type* ReturnOp = CI.getType();
2183 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2184 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2185 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2186}
2187
2188// Replace intrinsic with unmasked version and a select.
2190 CallBase &CI, Value *&Rep) {
2191 Name = Name.substr(12); // Remove avx512.mask.
2192
2193 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2194 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2195 Intrinsic::ID IID;
2196 if (Name.starts_with("max.p")) {
2197 if (VecWidth == 128 && EltWidth == 32)
2198 IID = Intrinsic::x86_sse_max_ps;
2199 else if (VecWidth == 128 && EltWidth == 64)
2200 IID = Intrinsic::x86_sse2_max_pd;
2201 else if (VecWidth == 256 && EltWidth == 32)
2202 IID = Intrinsic::x86_avx_max_ps_256;
2203 else if (VecWidth == 256 && EltWidth == 64)
2204 IID = Intrinsic::x86_avx_max_pd_256;
2205 else
2206 llvm_unreachable("Unexpected intrinsic");
2207 } else if (Name.starts_with("min.p")) {
2208 if (VecWidth == 128 && EltWidth == 32)
2209 IID = Intrinsic::x86_sse_min_ps;
2210 else if (VecWidth == 128 && EltWidth == 64)
2211 IID = Intrinsic::x86_sse2_min_pd;
2212 else if (VecWidth == 256 && EltWidth == 32)
2213 IID = Intrinsic::x86_avx_min_ps_256;
2214 else if (VecWidth == 256 && EltWidth == 64)
2215 IID = Intrinsic::x86_avx_min_pd_256;
2216 else
2217 llvm_unreachable("Unexpected intrinsic");
2218 } else if (Name.starts_with("pshuf.b.")) {
2219 if (VecWidth == 128)
2220 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2221 else if (VecWidth == 256)
2222 IID = Intrinsic::x86_avx2_pshuf_b;
2223 else if (VecWidth == 512)
2224 IID = Intrinsic::x86_avx512_pshuf_b_512;
2225 else
2226 llvm_unreachable("Unexpected intrinsic");
2227 } else if (Name.starts_with("pmul.hr.sw.")) {
2228 if (VecWidth == 128)
2229 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2230 else if (VecWidth == 256)
2231 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2232 else if (VecWidth == 512)
2233 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2234 else
2235 llvm_unreachable("Unexpected intrinsic");
2236 } else if (Name.starts_with("pmulh.w.")) {
2237 if (VecWidth == 128)
2238 IID = Intrinsic::x86_sse2_pmulh_w;
2239 else if (VecWidth == 256)
2240 IID = Intrinsic::x86_avx2_pmulh_w;
2241 else if (VecWidth == 512)
2242 IID = Intrinsic::x86_avx512_pmulh_w_512;
2243 else
2244 llvm_unreachable("Unexpected intrinsic");
2245 } else if (Name.starts_with("pmulhu.w.")) {
2246 if (VecWidth == 128)
2247 IID = Intrinsic::x86_sse2_pmulhu_w;
2248 else if (VecWidth == 256)
2249 IID = Intrinsic::x86_avx2_pmulhu_w;
2250 else if (VecWidth == 512)
2251 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2252 else
2253 llvm_unreachable("Unexpected intrinsic");
2254 } else if (Name.starts_with("pmaddw.d.")) {
2255 if (VecWidth == 128)
2256 IID = Intrinsic::x86_sse2_pmadd_wd;
2257 else if (VecWidth == 256)
2258 IID = Intrinsic::x86_avx2_pmadd_wd;
2259 else if (VecWidth == 512)
2260 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2261 else
2262 llvm_unreachable("Unexpected intrinsic");
2263 } else if (Name.starts_with("pmaddubs.w.")) {
2264 if (VecWidth == 128)
2265 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2266 else if (VecWidth == 256)
2267 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2268 else if (VecWidth == 512)
2269 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2270 else
2271 llvm_unreachable("Unexpected intrinsic");
2272 } else if (Name.starts_with("packsswb.")) {
2273 if (VecWidth == 128)
2274 IID = Intrinsic::x86_sse2_packsswb_128;
2275 else if (VecWidth == 256)
2276 IID = Intrinsic::x86_avx2_packsswb;
2277 else if (VecWidth == 512)
2278 IID = Intrinsic::x86_avx512_packsswb_512;
2279 else
2280 llvm_unreachable("Unexpected intrinsic");
2281 } else if (Name.starts_with("packssdw.")) {
2282 if (VecWidth == 128)
2283 IID = Intrinsic::x86_sse2_packssdw_128;
2284 else if (VecWidth == 256)
2285 IID = Intrinsic::x86_avx2_packssdw;
2286 else if (VecWidth == 512)
2287 IID = Intrinsic::x86_avx512_packssdw_512;
2288 else
2289 llvm_unreachable("Unexpected intrinsic");
2290 } else if (Name.starts_with("packuswb.")) {
2291 if (VecWidth == 128)
2292 IID = Intrinsic::x86_sse2_packuswb_128;
2293 else if (VecWidth == 256)
2294 IID = Intrinsic::x86_avx2_packuswb;
2295 else if (VecWidth == 512)
2296 IID = Intrinsic::x86_avx512_packuswb_512;
2297 else
2298 llvm_unreachable("Unexpected intrinsic");
2299 } else if (Name.starts_with("packusdw.")) {
2300 if (VecWidth == 128)
2301 IID = Intrinsic::x86_sse41_packusdw;
2302 else if (VecWidth == 256)
2303 IID = Intrinsic::x86_avx2_packusdw;
2304 else if (VecWidth == 512)
2305 IID = Intrinsic::x86_avx512_packusdw_512;
2306 else
2307 llvm_unreachable("Unexpected intrinsic");
2308 } else if (Name.starts_with("vpermilvar.")) {
2309 if (VecWidth == 128 && EltWidth == 32)
2310 IID = Intrinsic::x86_avx_vpermilvar_ps;
2311 else if (VecWidth == 128 && EltWidth == 64)
2312 IID = Intrinsic::x86_avx_vpermilvar_pd;
2313 else if (VecWidth == 256 && EltWidth == 32)
2314 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2315 else if (VecWidth == 256 && EltWidth == 64)
2316 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2317 else if (VecWidth == 512 && EltWidth == 32)
2318 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2319 else if (VecWidth == 512 && EltWidth == 64)
2320 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2321 else
2322 llvm_unreachable("Unexpected intrinsic");
2323 } else if (Name == "cvtpd2dq.256") {
2324 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2325 } else if (Name == "cvtpd2ps.256") {
2326 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2327 } else if (Name == "cvttpd2dq.256") {
2328 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2329 } else if (Name == "cvttps2dq.128") {
2330 IID = Intrinsic::x86_sse2_cvttps2dq;
2331 } else if (Name == "cvttps2dq.256") {
2332 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2333 } else if (Name.starts_with("permvar.")) {
2334 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2335 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2336 IID = Intrinsic::x86_avx2_permps;
2337 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2338 IID = Intrinsic::x86_avx2_permd;
2339 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2340 IID = Intrinsic::x86_avx512_permvar_df_256;
2341 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2342 IID = Intrinsic::x86_avx512_permvar_di_256;
2343 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2344 IID = Intrinsic::x86_avx512_permvar_sf_512;
2345 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2346 IID = Intrinsic::x86_avx512_permvar_si_512;
2347 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2348 IID = Intrinsic::x86_avx512_permvar_df_512;
2349 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2350 IID = Intrinsic::x86_avx512_permvar_di_512;
2351 else if (VecWidth == 128 && EltWidth == 16)
2352 IID = Intrinsic::x86_avx512_permvar_hi_128;
2353 else if (VecWidth == 256 && EltWidth == 16)
2354 IID = Intrinsic::x86_avx512_permvar_hi_256;
2355 else if (VecWidth == 512 && EltWidth == 16)
2356 IID = Intrinsic::x86_avx512_permvar_hi_512;
2357 else if (VecWidth == 128 && EltWidth == 8)
2358 IID = Intrinsic::x86_avx512_permvar_qi_128;
2359 else if (VecWidth == 256 && EltWidth == 8)
2360 IID = Intrinsic::x86_avx512_permvar_qi_256;
2361 else if (VecWidth == 512 && EltWidth == 8)
2362 IID = Intrinsic::x86_avx512_permvar_qi_512;
2363 else
2364 llvm_unreachable("Unexpected intrinsic");
2365 } else if (Name.starts_with("dbpsadbw.")) {
2366 if (VecWidth == 128)
2367 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2368 else if (VecWidth == 256)
2369 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2370 else if (VecWidth == 512)
2371 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2372 else
2373 llvm_unreachable("Unexpected intrinsic");
2374 } else if (Name.starts_with("pmultishift.qb.")) {
2375 if (VecWidth == 128)
2376 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2377 else if (VecWidth == 256)
2378 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2379 else if (VecWidth == 512)
2380 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2381 else
2382 llvm_unreachable("Unexpected intrinsic");
2383 } else if (Name.starts_with("conflict.")) {
2384 if (Name[9] == 'd' && VecWidth == 128)
2385 IID = Intrinsic::x86_avx512_conflict_d_128;
2386 else if (Name[9] == 'd' && VecWidth == 256)
2387 IID = Intrinsic::x86_avx512_conflict_d_256;
2388 else if (Name[9] == 'd' && VecWidth == 512)
2389 IID = Intrinsic::x86_avx512_conflict_d_512;
2390 else if (Name[9] == 'q' && VecWidth == 128)
2391 IID = Intrinsic::x86_avx512_conflict_q_128;
2392 else if (Name[9] == 'q' && VecWidth == 256)
2393 IID = Intrinsic::x86_avx512_conflict_q_256;
2394 else if (Name[9] == 'q' && VecWidth == 512)
2395 IID = Intrinsic::x86_avx512_conflict_q_512;
2396 else
2397 llvm_unreachable("Unexpected intrinsic");
2398 } else if (Name.starts_with("pavg.")) {
2399 if (Name[5] == 'b' && VecWidth == 128)
2400 IID = Intrinsic::x86_sse2_pavg_b;
2401 else if (Name[5] == 'b' && VecWidth == 256)
2402 IID = Intrinsic::x86_avx2_pavg_b;
2403 else if (Name[5] == 'b' && VecWidth == 512)
2404 IID = Intrinsic::x86_avx512_pavg_b_512;
2405 else if (Name[5] == 'w' && VecWidth == 128)
2406 IID = Intrinsic::x86_sse2_pavg_w;
2407 else if (Name[5] == 'w' && VecWidth == 256)
2408 IID = Intrinsic::x86_avx2_pavg_w;
2409 else if (Name[5] == 'w' && VecWidth == 512)
2410 IID = Intrinsic::x86_avx512_pavg_w_512;
2411 else
2412 llvm_unreachable("Unexpected intrinsic");
2413 } else
2414 return false;
2415
2416 SmallVector<Value *, 4> Args(CI.args());
2417 Args.pop_back();
2418 Args.pop_back();
2419 Rep = Builder.CreateIntrinsic(IID, Args);
2420 unsigned NumArgs = CI.arg_size();
2421 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2422 CI.getArgOperand(NumArgs - 2));
2423 return true;
2424}
2425
2426/// Upgrade comment in call to inline asm that represents an objc retain release
2427/// marker.
2428void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2429 size_t Pos;
2430 if (AsmStr->find("mov\tfp") == 0 &&
2431 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2432 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2433 AsmStr->replace(Pos, 1, ";");
2434 }
2435}
2436
2438 Function *F, IRBuilder<> &Builder) {
2439 Value *Rep = nullptr;
2440
2441 if (Name == "abs.i" || Name == "abs.ll") {
2442 Value *Arg = CI->getArgOperand(0);
2443 Value *Neg = Builder.CreateNeg(Arg, "neg");
2444 Value *Cmp = Builder.CreateICmpSGE(
2445 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2446 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2447 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2448 Type *Ty = (Name == "abs.bf16")
2449 ? Builder.getBFloatTy()
2450 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2451 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2452 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2453 Rep = Builder.CreateBitCast(Abs, CI->getType());
2454 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2455 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2456 : Intrinsic::nvvm_fabs;
2457 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2458 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2459 Name.starts_with("atomic.load.add.f64.p")) {
2460 Value *Ptr = CI->getArgOperand(0);
2461 Value *Val = CI->getArgOperand(1);
2462 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2463 AtomicOrdering::SequentiallyConsistent);
2464 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2465 Name.starts_with("atomic.load.dec.32.p")) {
2466 Value *Ptr = CI->getArgOperand(0);
2467 Value *Val = CI->getArgOperand(1);
2468 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2470 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2471 AtomicOrdering::SequentiallyConsistent);
2472 } else if (Name.consume_front("max.") &&
2473 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2474 Name == "ui" || Name == "ull")) {
2475 Value *Arg0 = CI->getArgOperand(0);
2476 Value *Arg1 = CI->getArgOperand(1);
2477 Value *Cmp = Name.starts_with("u")
2478 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2479 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2480 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2481 } else if (Name.consume_front("min.") &&
2482 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2483 Name == "ui" || Name == "ull")) {
2484 Value *Arg0 = CI->getArgOperand(0);
2485 Value *Arg1 = CI->getArgOperand(1);
2486 Value *Cmp = Name.starts_with("u")
2487 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2488 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2489 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2490 } else if (Name == "clz.ll") {
2491 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2492 Value *Arg = CI->getArgOperand(0);
2493 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2494 {Arg, Builder.getFalse()},
2495 /*FMFSource=*/nullptr, "ctlz");
2496 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2497 } else if (Name == "popc.ll") {
2498 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2499 // i64.
2500 Value *Arg = CI->getArgOperand(0);
2501 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2502 Arg, /*FMFSource=*/nullptr, "ctpop");
2503 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2504 } else if (Name == "h2f") {
2505 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2506 {Builder.getFloatTy()}, CI->getArgOperand(0),
2507 /*FMFSource=*/nullptr, "h2f");
2508 } else if (Name.consume_front("bitcast.") &&
2509 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2510 Name == "d2ll")) {
2511 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2512 } else if (Name == "rotate.b32") {
2513 Value *Arg = CI->getOperand(0);
2514 Value *ShiftAmt = CI->getOperand(1);
2515 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2516 {Arg, Arg, ShiftAmt});
2517 } else if (Name == "rotate.b64") {
2518 Type *Int64Ty = Builder.getInt64Ty();
2519 Value *Arg = CI->getOperand(0);
2520 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2521 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2522 {Arg, Arg, ZExtShiftAmt});
2523 } else if (Name == "rotate.right.b64") {
2524 Type *Int64Ty = Builder.getInt64Ty();
2525 Value *Arg = CI->getOperand(0);
2526 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2527 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2528 {Arg, Arg, ZExtShiftAmt});
2529 } else if (Name == "swap.lo.hi.b64") {
2530 Type *Int64Ty = Builder.getInt64Ty();
2531 Value *Arg = CI->getOperand(0);
2532 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2533 {Arg, Arg, Builder.getInt64(32)});
2534 } else if ((Name.consume_front("ptr.gen.to.") &&
2536 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2537 Name.starts_with(".to.gen"))) {
2538 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2539 } else if (Name.consume_front("ldg.global")) {
2540 Value *Ptr = CI->getArgOperand(0);
2541 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2542 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2543 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2544 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2545 MDNode *MD = MDNode::get(Builder.getContext(), {});
2546 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2547 return LD;
2548 } else if (Name == "tanh.approx.f32") {
2549 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2550 FastMathFlags FMF;
2551 FMF.setApproxFunc();
2552 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2553 FMF);
2554 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2555 Value *Arg =
2556 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2557 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2558 {}, {Arg});
2559 } else if (Name == "barrier") {
2560 Rep = Builder.CreateIntrinsic(
2561 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2562 {CI->getArgOperand(0), CI->getArgOperand(1)});
2563 } else if (Name == "barrier.sync") {
2564 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2565 {CI->getArgOperand(0)});
2566 } else if (Name == "barrier.sync.cnt") {
2567 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2568 {CI->getArgOperand(0), CI->getArgOperand(1)});
2569 } else {
2571 if (IID != Intrinsic::not_intrinsic &&
2572 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2573 rename(F);
2574 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2576 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2577 Value *Arg = CI->getArgOperand(I);
2578 Type *OldType = Arg->getType();
2579 Type *NewType = NewFn->getArg(I)->getType();
2580 Args.push_back(
2581 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2582 ? Builder.CreateBitCast(Arg, NewType)
2583 : Arg);
2584 }
2585 Rep = Builder.CreateCall(NewFn, Args);
2586 if (F->getReturnType()->isIntegerTy())
2587 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2588 }
2589 }
2590
2591 return Rep;
2592}
2593
2595 IRBuilder<> &Builder) {
2596 LLVMContext &C = F->getContext();
2597 Value *Rep = nullptr;
2598
2599 if (Name.starts_with("sse4a.movnt.")) {
2601 Elts.push_back(
2602 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2603 MDNode *Node = MDNode::get(C, Elts);
2604
2605 Value *Arg0 = CI->getArgOperand(0);
2606 Value *Arg1 = CI->getArgOperand(1);
2607
2608 // Nontemporal (unaligned) store of the 0'th element of the float/double
2609 // vector.
2610 Value *Extract =
2611 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2612
2613 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2614 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2615 } else if (Name.starts_with("avx.movnt.") ||
2616 Name.starts_with("avx512.storent.")) {
2618 Elts.push_back(
2619 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2620 MDNode *Node = MDNode::get(C, Elts);
2621
2622 Value *Arg0 = CI->getArgOperand(0);
2623 Value *Arg1 = CI->getArgOperand(1);
2624
2625 StoreInst *SI = Builder.CreateAlignedStore(
2626 Arg1, Arg0,
2628 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2629 } else if (Name == "sse2.storel.dq") {
2630 Value *Arg0 = CI->getArgOperand(0);
2631 Value *Arg1 = CI->getArgOperand(1);
2632
2633 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2634 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2635 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2636 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2637 } else if (Name.starts_with("sse.storeu.") ||
2638 Name.starts_with("sse2.storeu.") ||
2639 Name.starts_with("avx.storeu.")) {
2640 Value *Arg0 = CI->getArgOperand(0);
2641 Value *Arg1 = CI->getArgOperand(1);
2642 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2643 } else if (Name == "avx512.mask.store.ss") {
2644 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2645 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2646 Mask, false);
2647 } else if (Name.starts_with("avx512.mask.store")) {
2648 // "avx512.mask.storeu." or "avx512.mask.store."
2649 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2650 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2651 CI->getArgOperand(2), Aligned);
2652 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2653 // Upgrade packed integer vector compare intrinsics to compare instructions.
2654 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2655 bool CmpEq = Name[9] == 'e';
2656 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2657 CI->getArgOperand(0), CI->getArgOperand(1));
2658 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2659 } else if (Name.starts_with("avx512.broadcastm")) {
2660 Type *ExtTy = Type::getInt32Ty(C);
2661 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2662 ExtTy = Type::getInt64Ty(C);
2663 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2664 ExtTy->getPrimitiveSizeInBits();
2665 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2666 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2667 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2668 Value *Vec = CI->getArgOperand(0);
2669 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2670 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2671 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2672 } else if (Name.starts_with("avx.sqrt.p") ||
2673 Name.starts_with("sse2.sqrt.p") ||
2674 Name.starts_with("sse.sqrt.p")) {
2675 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2676 {CI->getArgOperand(0)});
2677 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2678 if (CI->arg_size() == 4 &&
2679 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2680 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2681 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2682 : Intrinsic::x86_avx512_sqrt_pd_512;
2683
2684 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2685 Rep = Builder.CreateIntrinsic(IID, Args);
2686 } else {
2687 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2688 {CI->getArgOperand(0)});
2689 }
2690 Rep =
2691 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2692 } else if (Name.starts_with("avx512.ptestm") ||
2693 Name.starts_with("avx512.ptestnm")) {
2694 Value *Op0 = CI->getArgOperand(0);
2695 Value *Op1 = CI->getArgOperand(1);
2696 Value *Mask = CI->getArgOperand(2);
2697 Rep = Builder.CreateAnd(Op0, Op1);
2698 llvm::Type *Ty = Op0->getType();
2700 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2701 ? ICmpInst::ICMP_NE
2702 : ICmpInst::ICMP_EQ;
2703 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2704 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2705 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2706 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2707 ->getNumElements();
2708 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2709 Rep =
2710 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2711 } else if (Name.starts_with("avx512.kunpck")) {
2712 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2713 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2714 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2715 int Indices[64];
2716 for (unsigned i = 0; i != NumElts; ++i)
2717 Indices[i] = i;
2718
2719 // First extract half of each vector. This gives better codegen than
2720 // doing it in a single shuffle.
2721 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2722 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2723 // Concat the vectors.
2724 // NOTE: Operands have to be swapped to match intrinsic definition.
2725 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2726 Rep = Builder.CreateBitCast(Rep, CI->getType());
2727 } else if (Name == "avx512.kand.w") {
2728 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2729 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2730 Rep = Builder.CreateAnd(LHS, RHS);
2731 Rep = Builder.CreateBitCast(Rep, CI->getType());
2732 } else if (Name == "avx512.kandn.w") {
2733 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2734 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2735 LHS = Builder.CreateNot(LHS);
2736 Rep = Builder.CreateAnd(LHS, RHS);
2737 Rep = Builder.CreateBitCast(Rep, CI->getType());
2738 } else if (Name == "avx512.kor.w") {
2739 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2740 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2741 Rep = Builder.CreateOr(LHS, RHS);
2742 Rep = Builder.CreateBitCast(Rep, CI->getType());
2743 } else if (Name == "avx512.kxor.w") {
2744 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2745 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2746 Rep = Builder.CreateXor(LHS, RHS);
2747 Rep = Builder.CreateBitCast(Rep, CI->getType());
2748 } else if (Name == "avx512.kxnor.w") {
2749 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2750 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2751 LHS = Builder.CreateNot(LHS);
2752 Rep = Builder.CreateXor(LHS, RHS);
2753 Rep = Builder.CreateBitCast(Rep, CI->getType());
2754 } else if (Name == "avx512.knot.w") {
2755 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2756 Rep = Builder.CreateNot(Rep);
2757 Rep = Builder.CreateBitCast(Rep, CI->getType());
2758 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2759 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2760 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2761 Rep = Builder.CreateOr(LHS, RHS);
2762 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2763 Value *C;
2764 if (Name[14] == 'c')
2765 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2766 else
2767 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2768 Rep = Builder.CreateICmpEQ(Rep, C);
2769 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2770 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2771 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2772 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2773 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2774 Type *I32Ty = Type::getInt32Ty(C);
2775 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2776 ConstantInt::get(I32Ty, 0));
2777 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2778 ConstantInt::get(I32Ty, 0));
2779 Value *EltOp;
2780 if (Name.contains(".add."))
2781 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2782 else if (Name.contains(".sub."))
2783 EltOp = Builder.CreateFSub(Elt0, Elt1);
2784 else if (Name.contains(".mul."))
2785 EltOp = Builder.CreateFMul(Elt0, Elt1);
2786 else
2787 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2788 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2789 ConstantInt::get(I32Ty, 0));
2790 } else if (Name.starts_with("avx512.mask.pcmp")) {
2791 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2792 bool CmpEq = Name[16] == 'e';
2793 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2794 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2795 Type *OpTy = CI->getArgOperand(0)->getType();
2796 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2797 Intrinsic::ID IID;
2798 switch (VecWidth) {
2799 default:
2800 llvm_unreachable("Unexpected intrinsic");
2801 case 128:
2802 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2803 break;
2804 case 256:
2805 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2806 break;
2807 case 512:
2808 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2809 break;
2810 }
2811
2812 Rep =
2813 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2814 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2815 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2816 Type *OpTy = CI->getArgOperand(0)->getType();
2817 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2818 unsigned EltWidth = OpTy->getScalarSizeInBits();
2819 Intrinsic::ID IID;
2820 if (VecWidth == 128 && EltWidth == 32)
2821 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2822 else if (VecWidth == 256 && EltWidth == 32)
2823 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2824 else if (VecWidth == 512 && EltWidth == 32)
2825 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2826 else if (VecWidth == 128 && EltWidth == 64)
2827 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2828 else if (VecWidth == 256 && EltWidth == 64)
2829 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2830 else if (VecWidth == 512 && EltWidth == 64)
2831 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2832 else
2833 llvm_unreachable("Unexpected intrinsic");
2834
2835 Rep =
2836 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2837 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2838 } else if (Name.starts_with("avx512.cmp.p")) {
2839 SmallVector<Value *, 4> Args(CI->args());
2840 Type *OpTy = Args[0]->getType();
2841 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2842 unsigned EltWidth = OpTy->getScalarSizeInBits();
2843 Intrinsic::ID IID;
2844 if (VecWidth == 128 && EltWidth == 32)
2845 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2846 else if (VecWidth == 256 && EltWidth == 32)
2847 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2848 else if (VecWidth == 512 && EltWidth == 32)
2849 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2850 else if (VecWidth == 128 && EltWidth == 64)
2851 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2852 else if (VecWidth == 256 && EltWidth == 64)
2853 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2854 else if (VecWidth == 512 && EltWidth == 64)
2855 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2856 else
2857 llvm_unreachable("Unexpected intrinsic");
2858
2860 if (VecWidth == 512)
2861 std::swap(Mask, Args.back());
2862 Args.push_back(Mask);
2863
2864 Rep = Builder.CreateIntrinsic(IID, Args);
2865 } else if (Name.starts_with("avx512.mask.cmp.")) {
2866 // Integer compare intrinsics.
2867 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2868 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2869 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2870 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2871 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2872 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2873 Name.starts_with("avx512.cvtw2mask.") ||
2874 Name.starts_with("avx512.cvtd2mask.") ||
2875 Name.starts_with("avx512.cvtq2mask.")) {
2876 Value *Op = CI->getArgOperand(0);
2877 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2878 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2879 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2880 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2881 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2882 Name.starts_with("avx512.mask.pabs")) {
2883 Rep = upgradeAbs(Builder, *CI);
2884 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2885 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2886 Name.starts_with("avx512.mask.pmaxs")) {
2887 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2888 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2889 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2890 Name.starts_with("avx512.mask.pmaxu")) {
2891 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2892 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2893 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2894 Name.starts_with("avx512.mask.pmins")) {
2895 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2896 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2897 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2898 Name.starts_with("avx512.mask.pminu")) {
2899 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2900 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2901 Name == "avx512.pmulu.dq.512" ||
2902 Name.starts_with("avx512.mask.pmulu.dq.")) {
2903 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2904 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2905 Name == "avx512.pmul.dq.512" ||
2906 Name.starts_with("avx512.mask.pmul.dq.")) {
2907 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2908 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2909 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2910 Rep =
2911 Builder.CreateSIToFP(CI->getArgOperand(1),
2912 cast<VectorType>(CI->getType())->getElementType());
2913 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2914 } else if (Name == "avx512.cvtusi2sd") {
2915 Rep =
2916 Builder.CreateUIToFP(CI->getArgOperand(1),
2917 cast<VectorType>(CI->getType())->getElementType());
2918 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2919 } else if (Name == "sse2.cvtss2sd") {
2920 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2921 Rep = Builder.CreateFPExt(
2922 Rep, cast<VectorType>(CI->getType())->getElementType());
2923 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2924 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2925 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2926 Name.starts_with("avx512.mask.cvtdq2pd.") ||
2927 Name.starts_with("avx512.mask.cvtudq2pd.") ||
2928 Name.starts_with("avx512.mask.cvtdq2ps.") ||
2929 Name.starts_with("avx512.mask.cvtudq2ps.") ||
2930 Name.starts_with("avx512.mask.cvtqq2pd.") ||
2931 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2932 Name == "avx512.mask.cvtqq2ps.256" ||
2933 Name == "avx512.mask.cvtqq2ps.512" ||
2934 Name == "avx512.mask.cvtuqq2ps.256" ||
2935 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2936 Name == "avx.cvt.ps2.pd.256" ||
2937 Name == "avx512.mask.cvtps2pd.128" ||
2938 Name == "avx512.mask.cvtps2pd.256") {
2939 auto *DstTy = cast<FixedVectorType>(CI->getType());
2940 Rep = CI->getArgOperand(0);
2941 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2942
2943 unsigned NumDstElts = DstTy->getNumElements();
2944 if (NumDstElts < SrcTy->getNumElements()) {
2945 assert(NumDstElts == 2 && "Unexpected vector size");
2946 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2947 }
2948
2949 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2950 bool IsUnsigned = Name.contains("cvtu");
2951 if (IsPS2PD)
2952 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2953 else if (CI->arg_size() == 4 &&
2954 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2955 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2956 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2957 : Intrinsic::x86_avx512_sitofp_round;
2958 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
2959 {Rep, CI->getArgOperand(3)});
2960 } else {
2961 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2962 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2963 }
2964
2965 if (CI->arg_size() >= 3)
2966 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2967 CI->getArgOperand(1));
2968 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2969 Name.starts_with("vcvtph2ps.")) {
2970 auto *DstTy = cast<FixedVectorType>(CI->getType());
2971 Rep = CI->getArgOperand(0);
2972 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2973 unsigned NumDstElts = DstTy->getNumElements();
2974 if (NumDstElts != SrcTy->getNumElements()) {
2975 assert(NumDstElts == 4 && "Unexpected vector size");
2976 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2977 }
2978 Rep = Builder.CreateBitCast(
2979 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2980 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2981 if (CI->arg_size() >= 3)
2982 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2983 CI->getArgOperand(1));
2984 } else if (Name.starts_with("avx512.mask.load")) {
2985 // "avx512.mask.loadu." or "avx512.mask.load."
2986 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2987 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2988 CI->getArgOperand(2), Aligned);
2989 } else if (Name.starts_with("avx512.mask.expand.load.")) {
2990 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2991 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2992 ResultTy->getNumElements());
2993
2994 Rep = Builder.CreateIntrinsic(
2995 Intrinsic::masked_expandload, ResultTy,
2996 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
2997 } else if (Name.starts_with("avx512.mask.compress.store.")) {
2998 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2999 Value *MaskVec =
3000 getX86MaskVec(Builder, CI->getArgOperand(2),
3001 cast<FixedVectorType>(ResultTy)->getNumElements());
3002
3003 Rep = Builder.CreateIntrinsic(
3004 Intrinsic::masked_compressstore, ResultTy,
3005 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3006 } else if (Name.starts_with("avx512.mask.compress.") ||
3007 Name.starts_with("avx512.mask.expand.")) {
3008 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3009
3010 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3011 ResultTy->getNumElements());
3012
3013 bool IsCompress = Name[12] == 'c';
3014 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3015 : Intrinsic::x86_avx512_mask_expand;
3016 Rep = Builder.CreateIntrinsic(
3017 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3018 } else if (Name.starts_with("xop.vpcom")) {
3019 bool IsSigned;
3020 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3021 Name.ends_with("uq"))
3022 IsSigned = false;
3023 else if (Name.ends_with("b") || Name.ends_with("w") ||
3024 Name.ends_with("d") || Name.ends_with("q"))
3025 IsSigned = true;
3026 else
3027 llvm_unreachable("Unknown suffix");
3028
3029 unsigned Imm;
3030 if (CI->arg_size() == 3) {
3031 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3032 } else {
3033 Name = Name.substr(9); // strip off "xop.vpcom"
3034 if (Name.starts_with("lt"))
3035 Imm = 0;
3036 else if (Name.starts_with("le"))
3037 Imm = 1;
3038 else if (Name.starts_with("gt"))
3039 Imm = 2;
3040 else if (Name.starts_with("ge"))
3041 Imm = 3;
3042 else if (Name.starts_with("eq"))
3043 Imm = 4;
3044 else if (Name.starts_with("ne"))
3045 Imm = 5;
3046 else if (Name.starts_with("false"))
3047 Imm = 6;
3048 else if (Name.starts_with("true"))
3049 Imm = 7;
3050 else
3051 llvm_unreachable("Unknown condition");
3052 }
3053
3054 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3055 } else if (Name.starts_with("xop.vpcmov")) {
3056 Value *Sel = CI->getArgOperand(2);
3057 Value *NotSel = Builder.CreateNot(Sel);
3058 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3059 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3060 Rep = Builder.CreateOr(Sel0, Sel1);
3061 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3062 Name.starts_with("avx512.mask.prol")) {
3063 Rep = upgradeX86Rotate(Builder, *CI, false);
3064 } else if (Name.starts_with("avx512.pror") ||
3065 Name.starts_with("avx512.mask.pror")) {
3066 Rep = upgradeX86Rotate(Builder, *CI, true);
3067 } else if (Name.starts_with("avx512.vpshld.") ||
3068 Name.starts_with("avx512.mask.vpshld") ||
3069 Name.starts_with("avx512.maskz.vpshld")) {
3070 bool ZeroMask = Name[11] == 'z';
3071 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3072 } else if (Name.starts_with("avx512.vpshrd.") ||
3073 Name.starts_with("avx512.mask.vpshrd") ||
3074 Name.starts_with("avx512.maskz.vpshrd")) {
3075 bool ZeroMask = Name[11] == 'z';
3076 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3077 } else if (Name == "sse42.crc32.64.8") {
3078 Value *Trunc0 =
3079 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3080 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3081 {Trunc0, CI->getArgOperand(1)});
3082 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3083 } else if (Name.starts_with("avx.vbroadcast.s") ||
3084 Name.starts_with("avx512.vbroadcast.s")) {
3085 // Replace broadcasts with a series of insertelements.
3086 auto *VecTy = cast<FixedVectorType>(CI->getType());
3087 Type *EltTy = VecTy->getElementType();
3088 unsigned EltNum = VecTy->getNumElements();
3089 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3090 Type *I32Ty = Type::getInt32Ty(C);
3091 Rep = PoisonValue::get(VecTy);
3092 for (unsigned I = 0; I < EltNum; ++I)
3093 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3094 } else if (Name.starts_with("sse41.pmovsx") ||
3095 Name.starts_with("sse41.pmovzx") ||
3096 Name.starts_with("avx2.pmovsx") ||
3097 Name.starts_with("avx2.pmovzx") ||
3098 Name.starts_with("avx512.mask.pmovsx") ||
3099 Name.starts_with("avx512.mask.pmovzx")) {
3100 auto *DstTy = cast<FixedVectorType>(CI->getType());
3101 unsigned NumDstElts = DstTy->getNumElements();
3102
3103 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3104 SmallVector<int, 8> ShuffleMask(NumDstElts);
3105 for (unsigned i = 0; i != NumDstElts; ++i)
3106 ShuffleMask[i] = i;
3107
3108 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3109
3110 bool DoSext = Name.contains("pmovsx");
3111 Rep =
3112 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3113 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3114 if (CI->arg_size() == 3)
3115 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3116 CI->getArgOperand(1));
3117 } else if (Name == "avx512.mask.pmov.qd.256" ||
3118 Name == "avx512.mask.pmov.qd.512" ||
3119 Name == "avx512.mask.pmov.wb.256" ||
3120 Name == "avx512.mask.pmov.wb.512") {
3121 Type *Ty = CI->getArgOperand(1)->getType();
3122 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3123 Rep =
3124 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3125 } else if (Name.starts_with("avx.vbroadcastf128") ||
3126 Name == "avx2.vbroadcasti128") {
3127 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3128 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3129 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3130 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3131 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3132 if (NumSrcElts == 2)
3133 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3134 else
3135 Rep = Builder.CreateShuffleVector(Load,
3136 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3137 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3138 Name.starts_with("avx512.mask.shuf.f")) {
3139 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3140 Type *VT = CI->getType();
3141 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3142 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3143 unsigned ControlBitsMask = NumLanes - 1;
3144 unsigned NumControlBits = NumLanes / 2;
3145 SmallVector<int, 8> ShuffleMask(0);
3146
3147 for (unsigned l = 0; l != NumLanes; ++l) {
3148 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3149 // We actually need the other source.
3150 if (l >= NumLanes / 2)
3151 LaneMask += NumLanes;
3152 for (unsigned i = 0; i != NumElementsInLane; ++i)
3153 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3154 }
3155 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3156 CI->getArgOperand(1), ShuffleMask);
3157 Rep =
3158 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3159 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3160 Name.starts_with("avx512.mask.broadcasti")) {
3161 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3162 ->getNumElements();
3163 unsigned NumDstElts =
3164 cast<FixedVectorType>(CI->getType())->getNumElements();
3165
3166 SmallVector<int, 8> ShuffleMask(NumDstElts);
3167 for (unsigned i = 0; i != NumDstElts; ++i)
3168 ShuffleMask[i] = i % NumSrcElts;
3169
3170 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3171 CI->getArgOperand(0), ShuffleMask);
3172 Rep =
3173 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3174 } else if (Name.starts_with("avx2.pbroadcast") ||
3175 Name.starts_with("avx2.vbroadcast") ||
3176 Name.starts_with("avx512.pbroadcast") ||
3177 Name.starts_with("avx512.mask.broadcast.s")) {
3178 // Replace vp?broadcasts with a vector shuffle.
3179 Value *Op = CI->getArgOperand(0);
3180 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3181 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3184 Rep = Builder.CreateShuffleVector(Op, M);
3185
3186 if (CI->arg_size() == 3)
3187 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3188 CI->getArgOperand(1));
3189 } else if (Name.starts_with("sse2.padds.") ||
3190 Name.starts_with("avx2.padds.") ||
3191 Name.starts_with("avx512.padds.") ||
3192 Name.starts_with("avx512.mask.padds.")) {
3193 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3194 } else if (Name.starts_with("sse2.psubs.") ||
3195 Name.starts_with("avx2.psubs.") ||
3196 Name.starts_with("avx512.psubs.") ||
3197 Name.starts_with("avx512.mask.psubs.")) {
3198 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3199 } else if (Name.starts_with("sse2.paddus.") ||
3200 Name.starts_with("avx2.paddus.") ||
3201 Name.starts_with("avx512.mask.paddus.")) {
3202 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3203 } else if (Name.starts_with("sse2.psubus.") ||
3204 Name.starts_with("avx2.psubus.") ||
3205 Name.starts_with("avx512.mask.psubus.")) {
3206 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3207 } else if (Name.starts_with("avx512.mask.palignr.")) {
3208 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3209 CI->getArgOperand(1), CI->getArgOperand(2),
3210 CI->getArgOperand(3), CI->getArgOperand(4),
3211 false);
3212 } else if (Name.starts_with("avx512.mask.valign.")) {
3214 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3215 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3216 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3217 // 128/256-bit shift left specified in bits.
3218 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3219 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3220 Shift / 8); // Shift is in bits.
3221 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3222 // 128/256-bit shift right specified in bits.
3223 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3224 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3225 Shift / 8); // Shift is in bits.
3226 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3227 Name == "avx512.psll.dq.512") {
3228 // 128/256/512-bit shift left specified in bytes.
3229 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3230 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3231 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3232 Name == "avx512.psrl.dq.512") {
3233 // 128/256/512-bit shift right specified in bytes.
3234 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3235 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3236 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3237 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3238 Name.starts_with("avx2.pblendd.")) {
3239 Value *Op0 = CI->getArgOperand(0);
3240 Value *Op1 = CI->getArgOperand(1);
3241 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3242 auto *VecTy = cast<FixedVectorType>(CI->getType());
3243 unsigned NumElts = VecTy->getNumElements();
3244
3245 SmallVector<int, 16> Idxs(NumElts);
3246 for (unsigned i = 0; i != NumElts; ++i)
3247 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3248
3249 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3250 } else if (Name.starts_with("avx.vinsertf128.") ||
3251 Name == "avx2.vinserti128" ||
3252 Name.starts_with("avx512.mask.insert")) {
3253 Value *Op0 = CI->getArgOperand(0);
3254 Value *Op1 = CI->getArgOperand(1);
3255 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3256 unsigned DstNumElts =
3257 cast<FixedVectorType>(CI->getType())->getNumElements();
3258 unsigned SrcNumElts =
3259 cast<FixedVectorType>(Op1->getType())->getNumElements();
3260 unsigned Scale = DstNumElts / SrcNumElts;
3261
3262 // Mask off the high bits of the immediate value; hardware ignores those.
3263 Imm = Imm % Scale;
3264
3265 // Extend the second operand into a vector the size of the destination.
3266 SmallVector<int, 8> Idxs(DstNumElts);
3267 for (unsigned i = 0; i != SrcNumElts; ++i)
3268 Idxs[i] = i;
3269 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3270 Idxs[i] = SrcNumElts;
3271 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3272
3273 // Insert the second operand into the first operand.
3274
3275 // Note that there is no guarantee that instruction lowering will actually
3276 // produce a vinsertf128 instruction for the created shuffles. In
3277 // particular, the 0 immediate case involves no lane changes, so it can
3278 // be handled as a blend.
3279
3280 // Example of shuffle mask for 32-bit elements:
3281 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3282 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3283
3284 // First fill with identify mask.
3285 for (unsigned i = 0; i != DstNumElts; ++i)
3286 Idxs[i] = i;
3287 // Then replace the elements where we need to insert.
3288 for (unsigned i = 0; i != SrcNumElts; ++i)
3289 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3290 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3291
3292 // If the intrinsic has a mask operand, handle that.
3293 if (CI->arg_size() == 5)
3294 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3295 CI->getArgOperand(3));
3296 } else if (Name.starts_with("avx.vextractf128.") ||
3297 Name == "avx2.vextracti128" ||
3298 Name.starts_with("avx512.mask.vextract")) {
3299 Value *Op0 = CI->getArgOperand(0);
3300 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3301 unsigned DstNumElts =
3302 cast<FixedVectorType>(CI->getType())->getNumElements();
3303 unsigned SrcNumElts =
3304 cast<FixedVectorType>(Op0->getType())->getNumElements();
3305 unsigned Scale = SrcNumElts / DstNumElts;
3306
3307 // Mask off the high bits of the immediate value; hardware ignores those.
3308 Imm = Imm % Scale;
3309
3310 // Get indexes for the subvector of the input vector.
3311 SmallVector<int, 8> Idxs(DstNumElts);
3312 for (unsigned i = 0; i != DstNumElts; ++i) {
3313 Idxs[i] = i + (Imm * DstNumElts);
3314 }
3315 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3316
3317 // If the intrinsic has a mask operand, handle that.
3318 if (CI->arg_size() == 4)
3319 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3320 CI->getArgOperand(2));
3321 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3322 Name.starts_with("avx512.mask.perm.di.")) {
3323 Value *Op0 = CI->getArgOperand(0);
3324 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3325 auto *VecTy = cast<FixedVectorType>(CI->getType());
3326 unsigned NumElts = VecTy->getNumElements();
3327
3328 SmallVector<int, 8> Idxs(NumElts);
3329 for (unsigned i = 0; i != NumElts; ++i)
3330 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3331
3332 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3333
3334 if (CI->arg_size() == 4)
3335 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3336 CI->getArgOperand(2));
3337 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3338 // The immediate permute control byte looks like this:
3339 // [1:0] - select 128 bits from sources for low half of destination
3340 // [2] - ignore
3341 // [3] - zero low half of destination
3342 // [5:4] - select 128 bits from sources for high half of destination
3343 // [6] - ignore
3344 // [7] - zero high half of destination
3345
3346 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3347
3348 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3349 unsigned HalfSize = NumElts / 2;
3350 SmallVector<int, 8> ShuffleMask(NumElts);
3351
3352 // Determine which operand(s) are actually in use for this instruction.
3353 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3354 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3355
3356 // If needed, replace operands based on zero mask.
3357 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3358 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3359
3360 // Permute low half of result.
3361 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3362 for (unsigned i = 0; i < HalfSize; ++i)
3363 ShuffleMask[i] = StartIndex + i;
3364
3365 // Permute high half of result.
3366 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3367 for (unsigned i = 0; i < HalfSize; ++i)
3368 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3369
3370 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3371
3372 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3373 Name.starts_with("avx512.mask.vpermil.p") ||
3374 Name.starts_with("avx512.mask.pshuf.d.")) {
3375 Value *Op0 = CI->getArgOperand(0);
3376 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3377 auto *VecTy = cast<FixedVectorType>(CI->getType());
3378 unsigned NumElts = VecTy->getNumElements();
3379 // Calculate the size of each index in the immediate.
3380 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3381 unsigned IdxMask = ((1 << IdxSize) - 1);
3382
3383 SmallVector<int, 8> Idxs(NumElts);
3384 // Lookup the bits for this element, wrapping around the immediate every
3385 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3386 // to offset by the first index of each group.
3387 for (unsigned i = 0; i != NumElts; ++i)
3388 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3389
3390 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3391
3392 if (CI->arg_size() == 4)
3393 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3394 CI->getArgOperand(2));
3395 } else if (Name == "sse2.pshufl.w" ||
3396 Name.starts_with("avx512.mask.pshufl.w.")) {
3397 Value *Op0 = CI->getArgOperand(0);
3398 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3399 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3400
3401 SmallVector<int, 16> Idxs(NumElts);
3402 for (unsigned l = 0; l != NumElts; l += 8) {
3403 for (unsigned i = 0; i != 4; ++i)
3404 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3405 for (unsigned i = 4; i != 8; ++i)
3406 Idxs[i + l] = i + l;
3407 }
3408
3409 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3410
3411 if (CI->arg_size() == 4)
3412 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3413 CI->getArgOperand(2));
3414 } else if (Name == "sse2.pshufh.w" ||
3415 Name.starts_with("avx512.mask.pshufh.w.")) {
3416 Value *Op0 = CI->getArgOperand(0);
3417 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3418 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3419
3420 SmallVector<int, 16> Idxs(NumElts);
3421 for (unsigned l = 0; l != NumElts; l += 8) {
3422 for (unsigned i = 0; i != 4; ++i)
3423 Idxs[i + l] = i + l;
3424 for (unsigned i = 0; i != 4; ++i)
3425 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3426 }
3427
3428 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3429
3430 if (CI->arg_size() == 4)
3431 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3432 CI->getArgOperand(2));
3433 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3434 Value *Op0 = CI->getArgOperand(0);
3435 Value *Op1 = CI->getArgOperand(1);
3436 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3437 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3438
3439 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3440 unsigned HalfLaneElts = NumLaneElts / 2;
3441
3442 SmallVector<int, 16> Idxs(NumElts);
3443 for (unsigned i = 0; i != NumElts; ++i) {
3444 // Base index is the starting element of the lane.
3445 Idxs[i] = i - (i % NumLaneElts);
3446 // If we are half way through the lane switch to the other source.
3447 if ((i % NumLaneElts) >= HalfLaneElts)
3448 Idxs[i] += NumElts;
3449 // Now select the specific element. By adding HalfLaneElts bits from
3450 // the immediate. Wrapping around the immediate every 8-bits.
3451 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3452 }
3453
3454 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3455
3456 Rep =
3457 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3458 } else if (Name.starts_with("avx512.mask.movddup") ||
3459 Name.starts_with("avx512.mask.movshdup") ||
3460 Name.starts_with("avx512.mask.movsldup")) {
3461 Value *Op0 = CI->getArgOperand(0);
3462 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3463 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3464
3465 unsigned Offset = 0;
3466 if (Name.starts_with("avx512.mask.movshdup."))
3467 Offset = 1;
3468
3469 SmallVector<int, 16> Idxs(NumElts);
3470 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3471 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3472 Idxs[i + l + 0] = i + l + Offset;
3473 Idxs[i + l + 1] = i + l + Offset;
3474 }
3475
3476 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3477
3478 Rep =
3479 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3480 } else if (Name.starts_with("avx512.mask.punpckl") ||
3481 Name.starts_with("avx512.mask.unpckl.")) {
3482 Value *Op0 = CI->getArgOperand(0);
3483 Value *Op1 = CI->getArgOperand(1);
3484 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3485 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3486
3487 SmallVector<int, 64> Idxs(NumElts);
3488 for (int l = 0; l != NumElts; l += NumLaneElts)
3489 for (int i = 0; i != NumLaneElts; ++i)
3490 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3491
3492 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3493
3494 Rep =
3495 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3496 } else if (Name.starts_with("avx512.mask.punpckh") ||
3497 Name.starts_with("avx512.mask.unpckh.")) {
3498 Value *Op0 = CI->getArgOperand(0);
3499 Value *Op1 = CI->getArgOperand(1);
3500 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3501 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3502
3503 SmallVector<int, 64> Idxs(NumElts);
3504 for (int l = 0; l != NumElts; l += NumLaneElts)
3505 for (int i = 0; i != NumLaneElts; ++i)
3506 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3507
3508 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3509
3510 Rep =
3511 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3512 } else if (Name.starts_with("avx512.mask.and.") ||
3513 Name.starts_with("avx512.mask.pand.")) {
3514 VectorType *FTy = cast<VectorType>(CI->getType());
3515 VectorType *ITy = VectorType::getInteger(FTy);
3516 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3517 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3518 Rep = Builder.CreateBitCast(Rep, FTy);
3519 Rep =
3520 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3521 } else if (Name.starts_with("avx512.mask.andn.") ||
3522 Name.starts_with("avx512.mask.pandn.")) {
3523 VectorType *FTy = cast<VectorType>(CI->getType());
3524 VectorType *ITy = VectorType::getInteger(FTy);
3525 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3526 Rep = Builder.CreateAnd(Rep,
3527 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3528 Rep = Builder.CreateBitCast(Rep, FTy);
3529 Rep =
3530 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3531 } else if (Name.starts_with("avx512.mask.or.") ||
3532 Name.starts_with("avx512.mask.por.")) {
3533 VectorType *FTy = cast<VectorType>(CI->getType());
3534 VectorType *ITy = VectorType::getInteger(FTy);
3535 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3536 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3537 Rep = Builder.CreateBitCast(Rep, FTy);
3538 Rep =
3539 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3540 } else if (Name.starts_with("avx512.mask.xor.") ||
3541 Name.starts_with("avx512.mask.pxor.")) {
3542 VectorType *FTy = cast<VectorType>(CI->getType());
3543 VectorType *ITy = VectorType::getInteger(FTy);
3544 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3545 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3546 Rep = Builder.CreateBitCast(Rep, FTy);
3547 Rep =
3548 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3549 } else if (Name.starts_with("avx512.mask.padd.")) {
3550 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3551 Rep =
3552 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3553 } else if (Name.starts_with("avx512.mask.psub.")) {
3554 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3555 Rep =
3556 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3557 } else if (Name.starts_with("avx512.mask.pmull.")) {
3558 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3559 Rep =
3560 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3561 } else if (Name.starts_with("avx512.mask.add.p")) {
3562 if (Name.ends_with(".512")) {
3563 Intrinsic::ID IID;
3564 if (Name[17] == 's')
3565 IID = Intrinsic::x86_avx512_add_ps_512;
3566 else
3567 IID = Intrinsic::x86_avx512_add_pd_512;
3568
3569 Rep = Builder.CreateIntrinsic(
3570 IID,
3571 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3572 } else {
3573 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3574 }
3575 Rep =
3576 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3577 } else if (Name.starts_with("avx512.mask.div.p")) {
3578 if (Name.ends_with(".512")) {
3579 Intrinsic::ID IID;
3580 if (Name[17] == 's')
3581 IID = Intrinsic::x86_avx512_div_ps_512;
3582 else
3583 IID = Intrinsic::x86_avx512_div_pd_512;
3584
3585 Rep = Builder.CreateIntrinsic(
3586 IID,
3587 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3588 } else {
3589 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3590 }
3591 Rep =
3592 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3593 } else if (Name.starts_with("avx512.mask.mul.p")) {
3594 if (Name.ends_with(".512")) {
3595 Intrinsic::ID IID;
3596 if (Name[17] == 's')
3597 IID = Intrinsic::x86_avx512_mul_ps_512;
3598 else
3599 IID = Intrinsic::x86_avx512_mul_pd_512;
3600
3601 Rep = Builder.CreateIntrinsic(
3602 IID,
3603 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3604 } else {
3605 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3606 }
3607 Rep =
3608 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3609 } else if (Name.starts_with("avx512.mask.sub.p")) {
3610 if (Name.ends_with(".512")) {
3611 Intrinsic::ID IID;
3612 if (Name[17] == 's')
3613 IID = Intrinsic::x86_avx512_sub_ps_512;
3614 else
3615 IID = Intrinsic::x86_avx512_sub_pd_512;
3616
3617 Rep = Builder.CreateIntrinsic(
3618 IID,
3619 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3620 } else {
3621 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3622 }
3623 Rep =
3624 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3625 } else if ((Name.starts_with("avx512.mask.max.p") ||
3626 Name.starts_with("avx512.mask.min.p")) &&
3627 Name.drop_front(18) == ".512") {
3628 bool IsDouble = Name[17] == 'd';
3629 bool IsMin = Name[13] == 'i';
3630 static const Intrinsic::ID MinMaxTbl[2][2] = {
3631 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3632 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3633 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3634
3635 Rep = Builder.CreateIntrinsic(
3636 IID,
3637 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3638 Rep =
3639 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3640 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3641 Rep =
3642 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3643 {CI->getArgOperand(0), Builder.getInt1(false)});
3644 Rep =
3645 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3646 } else if (Name.starts_with("avx512.mask.psll")) {
3647 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3648 bool IsVariable = Name[16] == 'v';
3649 char Size = Name[16] == '.' ? Name[17]
3650 : Name[17] == '.' ? Name[18]
3651 : Name[18] == '.' ? Name[19]
3652 : Name[20];
3653
3654 Intrinsic::ID IID;
3655 if (IsVariable && Name[17] != '.') {
3656 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3657 IID = Intrinsic::x86_avx2_psllv_q;
3658 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3659 IID = Intrinsic::x86_avx2_psllv_q_256;
3660 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3661 IID = Intrinsic::x86_avx2_psllv_d;
3662 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3663 IID = Intrinsic::x86_avx2_psllv_d_256;
3664 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3665 IID = Intrinsic::x86_avx512_psllv_w_128;
3666 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3667 IID = Intrinsic::x86_avx512_psllv_w_256;
3668 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3669 IID = Intrinsic::x86_avx512_psllv_w_512;
3670 else
3671 llvm_unreachable("Unexpected size");
3672 } else if (Name.ends_with(".128")) {
3673 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3674 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3675 : Intrinsic::x86_sse2_psll_d;
3676 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3677 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3678 : Intrinsic::x86_sse2_psll_q;
3679 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3680 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3681 : Intrinsic::x86_sse2_psll_w;
3682 else
3683 llvm_unreachable("Unexpected size");
3684 } else if (Name.ends_with(".256")) {
3685 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3686 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3687 : Intrinsic::x86_avx2_psll_d;
3688 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3689 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3690 : Intrinsic::x86_avx2_psll_q;
3691 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3692 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3693 : Intrinsic::x86_avx2_psll_w;
3694 else
3695 llvm_unreachable("Unexpected size");
3696 } else {
3697 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3698 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3699 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3700 : Intrinsic::x86_avx512_psll_d_512;
3701 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3702 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3703 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3704 : Intrinsic::x86_avx512_psll_q_512;
3705 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3706 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3707 : Intrinsic::x86_avx512_psll_w_512;
3708 else
3709 llvm_unreachable("Unexpected size");
3710 }
3711
3712 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3713 } else if (Name.starts_with("avx512.mask.psrl")) {
3714 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3715 bool IsVariable = Name[16] == 'v';
3716 char Size = Name[16] == '.' ? Name[17]
3717 : Name[17] == '.' ? Name[18]
3718 : Name[18] == '.' ? Name[19]
3719 : Name[20];
3720
3721 Intrinsic::ID IID;
3722 if (IsVariable && Name[17] != '.') {
3723 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3724 IID = Intrinsic::x86_avx2_psrlv_q;
3725 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3726 IID = Intrinsic::x86_avx2_psrlv_q_256;
3727 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3728 IID = Intrinsic::x86_avx2_psrlv_d;
3729 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3730 IID = Intrinsic::x86_avx2_psrlv_d_256;
3731 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3732 IID = Intrinsic::x86_avx512_psrlv_w_128;
3733 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3734 IID = Intrinsic::x86_avx512_psrlv_w_256;
3735 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3736 IID = Intrinsic::x86_avx512_psrlv_w_512;
3737 else
3738 llvm_unreachable("Unexpected size");
3739 } else if (Name.ends_with(".128")) {
3740 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3741 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3742 : Intrinsic::x86_sse2_psrl_d;
3743 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3744 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3745 : Intrinsic::x86_sse2_psrl_q;
3746 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3747 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3748 : Intrinsic::x86_sse2_psrl_w;
3749 else
3750 llvm_unreachable("Unexpected size");
3751 } else if (Name.ends_with(".256")) {
3752 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3753 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3754 : Intrinsic::x86_avx2_psrl_d;
3755 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3756 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3757 : Intrinsic::x86_avx2_psrl_q;
3758 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3759 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3760 : Intrinsic::x86_avx2_psrl_w;
3761 else
3762 llvm_unreachable("Unexpected size");
3763 } else {
3764 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3765 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3766 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3767 : Intrinsic::x86_avx512_psrl_d_512;
3768 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3769 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3770 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3771 : Intrinsic::x86_avx512_psrl_q_512;
3772 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3773 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3774 : Intrinsic::x86_avx512_psrl_w_512;
3775 else
3776 llvm_unreachable("Unexpected size");
3777 }
3778
3779 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3780 } else if (Name.starts_with("avx512.mask.psra")) {
3781 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3782 bool IsVariable = Name[16] == 'v';
3783 char Size = Name[16] == '.' ? Name[17]
3784 : Name[17] == '.' ? Name[18]
3785 : Name[18] == '.' ? Name[19]
3786 : Name[20];
3787
3788 Intrinsic::ID IID;
3789 if (IsVariable && Name[17] != '.') {
3790 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3791 IID = Intrinsic::x86_avx2_psrav_d;
3792 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3793 IID = Intrinsic::x86_avx2_psrav_d_256;
3794 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3795 IID = Intrinsic::x86_avx512_psrav_w_128;
3796 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3797 IID = Intrinsic::x86_avx512_psrav_w_256;
3798 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3799 IID = Intrinsic::x86_avx512_psrav_w_512;
3800 else
3801 llvm_unreachable("Unexpected size");
3802 } else if (Name.ends_with(".128")) {
3803 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3804 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3805 : Intrinsic::x86_sse2_psra_d;
3806 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3807 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3808 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3809 : Intrinsic::x86_avx512_psra_q_128;
3810 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3811 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3812 : Intrinsic::x86_sse2_psra_w;
3813 else
3814 llvm_unreachable("Unexpected size");
3815 } else if (Name.ends_with(".256")) {
3816 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3817 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3818 : Intrinsic::x86_avx2_psra_d;
3819 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3820 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3821 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3822 : Intrinsic::x86_avx512_psra_q_256;
3823 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3824 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3825 : Intrinsic::x86_avx2_psra_w;
3826 else
3827 llvm_unreachable("Unexpected size");
3828 } else {
3829 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3830 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3831 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3832 : Intrinsic::x86_avx512_psra_d_512;
3833 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3834 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3835 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3836 : Intrinsic::x86_avx512_psra_q_512;
3837 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3838 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3839 : Intrinsic::x86_avx512_psra_w_512;
3840 else
3841 llvm_unreachable("Unexpected size");
3842 }
3843
3844 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3845 } else if (Name.starts_with("avx512.mask.move.s")) {
3846 Rep = upgradeMaskedMove(Builder, *CI);
3847 } else if (Name.starts_with("avx512.cvtmask2")) {
3848 Rep = upgradeMaskToInt(Builder, *CI);
3849 } else if (Name.ends_with(".movntdqa")) {
3851 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3852
3853 LoadInst *LI = Builder.CreateAlignedLoad(
3854 CI->getType(), CI->getArgOperand(0),
3856 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3857 Rep = LI;
3858 } else if (Name.starts_with("fma.vfmadd.") ||
3859 Name.starts_with("fma.vfmsub.") ||
3860 Name.starts_with("fma.vfnmadd.") ||
3861 Name.starts_with("fma.vfnmsub.")) {
3862 bool NegMul = Name[6] == 'n';
3863 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3864 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3865
3866 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3867 CI->getArgOperand(2)};
3868
3869 if (IsScalar) {
3870 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3871 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3872 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3873 }
3874
3875 if (NegMul && !IsScalar)
3876 Ops[0] = Builder.CreateFNeg(Ops[0]);
3877 if (NegMul && IsScalar)
3878 Ops[1] = Builder.CreateFNeg(Ops[1]);
3879 if (NegAcc)
3880 Ops[2] = Builder.CreateFNeg(Ops[2]);
3881
3882 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3883
3884 if (IsScalar)
3885 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3886 } else if (Name.starts_with("fma4.vfmadd.s")) {
3887 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3888 CI->getArgOperand(2)};
3889
3890 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3891 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3892 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3893
3894 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3895
3897 Rep, (uint64_t)0);
3898 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3899 Name.starts_with("avx512.maskz.vfmadd.s") ||
3900 Name.starts_with("avx512.mask3.vfmadd.s") ||
3901 Name.starts_with("avx512.mask3.vfmsub.s") ||
3902 Name.starts_with("avx512.mask3.vfnmsub.s")) {
3903 bool IsMask3 = Name[11] == '3';
3904 bool IsMaskZ = Name[11] == 'z';
3905 // Drop the "avx512.mask." to make it easier.
3906 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3907 bool NegMul = Name[2] == 'n';
3908 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3909
3910 Value *A = CI->getArgOperand(0);
3911 Value *B = CI->getArgOperand(1);
3912 Value *C = CI->getArgOperand(2);
3913
3914 if (NegMul && (IsMask3 || IsMaskZ))
3915 A = Builder.CreateFNeg(A);
3916 if (NegMul && !(IsMask3 || IsMaskZ))
3917 B = Builder.CreateFNeg(B);
3918 if (NegAcc)
3919 C = Builder.CreateFNeg(C);
3920
3921 A = Builder.CreateExtractElement(A, (uint64_t)0);
3922 B = Builder.CreateExtractElement(B, (uint64_t)0);
3923 C = Builder.CreateExtractElement(C, (uint64_t)0);
3924
3925 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3926 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3927 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3928
3929 Intrinsic::ID IID;
3930 if (Name.back() == 'd')
3931 IID = Intrinsic::x86_avx512_vfmadd_f64;
3932 else
3933 IID = Intrinsic::x86_avx512_vfmadd_f32;
3934 Rep = Builder.CreateIntrinsic(IID, Ops);
3935 } else {
3936 Rep = Builder.CreateFMA(A, B, C);
3937 }
3938
3939 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
3940 : IsMask3 ? C
3941 : A;
3942
3943 // For Mask3 with NegAcc, we need to create a new extractelement that
3944 // avoids the negation above.
3945 if (NegAcc && IsMask3)
3946 PassThru =
3947 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3948
3949 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3950 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3951 (uint64_t)0);
3952 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3953 Name.starts_with("avx512.mask.vfnmadd.p") ||
3954 Name.starts_with("avx512.mask.vfnmsub.p") ||
3955 Name.starts_with("avx512.mask3.vfmadd.p") ||
3956 Name.starts_with("avx512.mask3.vfmsub.p") ||
3957 Name.starts_with("avx512.mask3.vfnmsub.p") ||
3958 Name.starts_with("avx512.maskz.vfmadd.p")) {
3959 bool IsMask3 = Name[11] == '3';
3960 bool IsMaskZ = Name[11] == 'z';
3961 // Drop the "avx512.mask." to make it easier.
3962 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3963 bool NegMul = Name[2] == 'n';
3964 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3965
3966 Value *A = CI->getArgOperand(0);
3967 Value *B = CI->getArgOperand(1);
3968 Value *C = CI->getArgOperand(2);
3969
3970 if (NegMul && (IsMask3 || IsMaskZ))
3971 A = Builder.CreateFNeg(A);
3972 if (NegMul && !(IsMask3 || IsMaskZ))
3973 B = Builder.CreateFNeg(B);
3974 if (NegAcc)
3975 C = Builder.CreateFNeg(C);
3976
3977 if (CI->arg_size() == 5 &&
3978 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3979 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3980 Intrinsic::ID IID;
3981 // Check the character before ".512" in string.
3982 if (Name[Name.size() - 5] == 's')
3983 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3984 else
3985 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3986
3987 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
3988 } else {
3989 Rep = Builder.CreateFMA(A, B, C);
3990 }
3991
3992 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3993 : IsMask3 ? CI->getArgOperand(2)
3994 : CI->getArgOperand(0);
3995
3996 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3997 } else if (Name.starts_with("fma.vfmsubadd.p")) {
3998 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3999 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4000 Intrinsic::ID IID;
4001 if (VecWidth == 128 && EltWidth == 32)
4002 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4003 else if (VecWidth == 256 && EltWidth == 32)
4004 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4005 else if (VecWidth == 128 && EltWidth == 64)
4006 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4007 else if (VecWidth == 256 && EltWidth == 64)
4008 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4009 else
4010 llvm_unreachable("Unexpected intrinsic");
4011
4012 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4013 CI->getArgOperand(2)};
4014 Ops[2] = Builder.CreateFNeg(Ops[2]);
4015 Rep = Builder.CreateIntrinsic(IID, Ops);
4016 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4017 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4018 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4019 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4020 bool IsMask3 = Name[11] == '3';
4021 bool IsMaskZ = Name[11] == 'z';
4022 // Drop the "avx512.mask." to make it easier.
4023 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4024 bool IsSubAdd = Name[3] == 's';
4025 if (CI->arg_size() == 5) {
4026 Intrinsic::ID IID;
4027 // Check the character before ".512" in string.
4028 if (Name[Name.size() - 5] == 's')
4029 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4030 else
4031 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4032
4033 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4034 CI->getArgOperand(2), CI->getArgOperand(4)};
4035 if (IsSubAdd)
4036 Ops[2] = Builder.CreateFNeg(Ops[2]);
4037
4038 Rep = Builder.CreateIntrinsic(IID, Ops);
4039 } else {
4040 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4041
4042 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4043 CI->getArgOperand(2)};
4044
4046 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4047 Value *Odd = Builder.CreateCall(FMA, Ops);
4048 Ops[2] = Builder.CreateFNeg(Ops[2]);
4049 Value *Even = Builder.CreateCall(FMA, Ops);
4050
4051 if (IsSubAdd)
4052 std::swap(Even, Odd);
4053
4054 SmallVector<int, 32> Idxs(NumElts);
4055 for (int i = 0; i != NumElts; ++i)
4056 Idxs[i] = i + (i % 2) * NumElts;
4057
4058 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4059 }
4060
4061 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4062 : IsMask3 ? CI->getArgOperand(2)
4063 : CI->getArgOperand(0);
4064
4065 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4066 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4067 Name.starts_with("avx512.maskz.pternlog.")) {
4068 bool ZeroMask = Name[11] == 'z';
4069 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4070 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4071 Intrinsic::ID IID;
4072 if (VecWidth == 128 && EltWidth == 32)
4073 IID = Intrinsic::x86_avx512_pternlog_d_128;
4074 else if (VecWidth == 256 && EltWidth == 32)
4075 IID = Intrinsic::x86_avx512_pternlog_d_256;
4076 else if (VecWidth == 512 && EltWidth == 32)
4077 IID = Intrinsic::x86_avx512_pternlog_d_512;
4078 else if (VecWidth == 128 && EltWidth == 64)
4079 IID = Intrinsic::x86_avx512_pternlog_q_128;
4080 else if (VecWidth == 256 && EltWidth == 64)
4081 IID = Intrinsic::x86_avx512_pternlog_q_256;
4082 else if (VecWidth == 512 && EltWidth == 64)
4083 IID = Intrinsic::x86_avx512_pternlog_q_512;
4084 else
4085 llvm_unreachable("Unexpected intrinsic");
4086
4087 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4088 CI->getArgOperand(2), CI->getArgOperand(3)};
4089 Rep = Builder.CreateIntrinsic(IID, Args);
4090 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4091 : CI->getArgOperand(0);
4092 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4093 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4094 Name.starts_with("avx512.maskz.vpmadd52")) {
4095 bool ZeroMask = Name[11] == 'z';
4096 bool High = Name[20] == 'h' || Name[21] == 'h';
4097 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4098 Intrinsic::ID IID;
4099 if (VecWidth == 128 && !High)
4100 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4101 else if (VecWidth == 256 && !High)
4102 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4103 else if (VecWidth == 512 && !High)
4104 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4105 else if (VecWidth == 128 && High)
4106 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4107 else if (VecWidth == 256 && High)
4108 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4109 else if (VecWidth == 512 && High)
4110 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4111 else
4112 llvm_unreachable("Unexpected intrinsic");
4113
4114 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4115 CI->getArgOperand(2)};
4116 Rep = Builder.CreateIntrinsic(IID, Args);
4117 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4118 : CI->getArgOperand(0);
4119 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4120 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4121 Name.starts_with("avx512.mask.vpermt2var.") ||
4122 Name.starts_with("avx512.maskz.vpermt2var.")) {
4123 bool ZeroMask = Name[11] == 'z';
4124 bool IndexForm = Name[17] == 'i';
4125 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4126 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4127 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4128 Name.starts_with("avx512.mask.vpdpbusds.") ||
4129 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4130 bool ZeroMask = Name[11] == 'z';
4131 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4132 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4133 Intrinsic::ID IID;
4134 if (VecWidth == 128 && !IsSaturating)
4135 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4136 else if (VecWidth == 256 && !IsSaturating)
4137 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4138 else if (VecWidth == 512 && !IsSaturating)
4139 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4140 else if (VecWidth == 128 && IsSaturating)
4141 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4142 else if (VecWidth == 256 && IsSaturating)
4143 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4144 else if (VecWidth == 512 && IsSaturating)
4145 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4146 else
4147 llvm_unreachable("Unexpected intrinsic");
4148
4149 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4150 CI->getArgOperand(2)};
4151 Rep = Builder.CreateIntrinsic(IID, Args);
4152 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4153 : CI->getArgOperand(0);
4154 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4155 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4156 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4157 Name.starts_with("avx512.mask.vpdpwssds.") ||
4158 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4159 bool ZeroMask = Name[11] == 'z';
4160 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4161 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4162 Intrinsic::ID IID;
4163 if (VecWidth == 128 && !IsSaturating)
4164 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4165 else if (VecWidth == 256 && !IsSaturating)
4166 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4167 else if (VecWidth == 512 && !IsSaturating)
4168 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4169 else if (VecWidth == 128 && IsSaturating)
4170 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4171 else if (VecWidth == 256 && IsSaturating)
4172 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4173 else if (VecWidth == 512 && IsSaturating)
4174 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4175 else
4176 llvm_unreachable("Unexpected intrinsic");
4177
4178 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4179 CI->getArgOperand(2)};
4180 Rep = Builder.CreateIntrinsic(IID, Args);
4181 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4182 : CI->getArgOperand(0);
4183 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4184 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4185 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4186 Name == "subborrow.u32" || Name == "subborrow.u64") {
4187 Intrinsic::ID IID;
4188 if (Name[0] == 'a' && Name.back() == '2')
4189 IID = Intrinsic::x86_addcarry_32;
4190 else if (Name[0] == 'a' && Name.back() == '4')
4191 IID = Intrinsic::x86_addcarry_64;
4192 else if (Name[0] == 's' && Name.back() == '2')
4193 IID = Intrinsic::x86_subborrow_32;
4194 else if (Name[0] == 's' && Name.back() == '4')
4195 IID = Intrinsic::x86_subborrow_64;
4196 else
4197 llvm_unreachable("Unexpected intrinsic");
4198
4199 // Make a call with 3 operands.
4200 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4201 CI->getArgOperand(2)};
4202 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4203
4204 // Extract the second result and store it.
4205 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4206 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4207 // Replace the original call result with the first result of the new call.
4208 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4209
4210 CI->replaceAllUsesWith(CF);
4211 Rep = nullptr;
4212 } else if (Name.starts_with("avx512.mask.") &&
4213 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4214 // Rep will be updated by the call in the condition.
4215 }
4216
4217 return Rep;
4218}
4219
4221 Function *F, IRBuilder<> &Builder) {
4222 if (Name.starts_with("neon.bfcvt")) {
4223 if (Name.starts_with("neon.bfcvtn2")) {
4224 SmallVector<int, 32> LoMask(4);
4225 std::iota(LoMask.begin(), LoMask.end(), 0);
4226 SmallVector<int, 32> ConcatMask(8);
4227 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4228 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4229 Value *Trunc =
4230 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4231 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4232 } else if (Name.starts_with("neon.bfcvtn")) {
4233 SmallVector<int, 32> ConcatMask(8);
4234 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4235 Type *V4BF16 =
4236 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4237 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4238 dbgs() << "Trunc: " << *Trunc << "\n";
4239 return Builder.CreateShuffleVector(
4240 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4241 } else {
4242 return Builder.CreateFPTrunc(CI->getOperand(0),
4243 Type::getBFloatTy(F->getContext()));
4244 }
4245 } else if (Name.starts_with("sve.fcvt")) {
4246 Intrinsic::ID NewID =
4248 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4249 .Case("sve.fcvtnt.bf16f32",
4250 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4252 if (NewID == Intrinsic::not_intrinsic)
4253 llvm_unreachable("Unhandled Intrinsic!");
4254
4255 SmallVector<Value *, 3> Args(CI->args());
4256
4257 // The original intrinsics incorrectly used a predicate based on the
4258 // smallest element type rather than the largest.
4259 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4260 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4261
4262 if (Args[1]->getType() != BadPredTy)
4263 llvm_unreachable("Unexpected predicate type!");
4264
4265 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4266 BadPredTy, Args[1]);
4267 Args[1] = Builder.CreateIntrinsic(
4268 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4269
4270 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4271 CI->getName());
4272 }
4273
4274 llvm_unreachable("Unhandled Intrinsic!");
4275}
4276
4278 IRBuilder<> &Builder) {
4279 if (Name == "mve.vctp64.old") {
4280 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4281 // correct type.
4282 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4283 CI->getArgOperand(0),
4284 /*FMFSource=*/nullptr, CI->getName());
4285 Value *C1 = Builder.CreateIntrinsic(
4286 Intrinsic::arm_mve_pred_v2i,
4287 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4288 return Builder.CreateIntrinsic(
4289 Intrinsic::arm_mve_pred_i2v,
4290 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4291 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4292 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4293 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4294 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4295 Name ==
4296 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4297 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4298 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4299 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4300 Name ==
4301 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4302 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4303 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4304 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4305 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4306 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4307 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4308 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4309 std::vector<Type *> Tys;
4310 unsigned ID = CI->getIntrinsicID();
4311 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4312 switch (ID) {
4313 case Intrinsic::arm_mve_mull_int_predicated:
4314 case Intrinsic::arm_mve_vqdmull_predicated:
4315 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4316 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4317 break;
4318 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4319 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4320 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4321 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4322 V2I1Ty};
4323 break;
4324 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4325 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4326 CI->getOperand(1)->getType(), V2I1Ty};
4327 break;
4328 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4329 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4330 CI->getOperand(2)->getType(), V2I1Ty};
4331 break;
4332 case Intrinsic::arm_cde_vcx1q_predicated:
4333 case Intrinsic::arm_cde_vcx1qa_predicated:
4334 case Intrinsic::arm_cde_vcx2q_predicated:
4335 case Intrinsic::arm_cde_vcx2qa_predicated:
4336 case Intrinsic::arm_cde_vcx3q_predicated:
4337 case Intrinsic::arm_cde_vcx3qa_predicated:
4338 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4339 break;
4340 default:
4341 llvm_unreachable("Unhandled Intrinsic!");
4342 }
4343
4344 std::vector<Value *> Ops;
4345 for (Value *Op : CI->args()) {
4346 Type *Ty = Op->getType();
4347 if (Ty->getScalarSizeInBits() == 1) {
4348 Value *C1 = Builder.CreateIntrinsic(
4349 Intrinsic::arm_mve_pred_v2i,
4350 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4351 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4352 }
4353 Ops.push_back(Op);
4354 }
4355
4356 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4357 CI->getName());
4358 }
4359 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4360}
4361
4362// These are expected to have the arguments:
4363// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4364//
4365// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4366//
4368 Function *F, IRBuilder<> &Builder) {
4369 AtomicRMWInst::BinOp RMWOp =
4371 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4372 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4373 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4374 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4375 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4376 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4377 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4378 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4379 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4380 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4381 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
4382
4383 unsigned NumOperands = CI->getNumOperands();
4384 if (NumOperands < 3) // Malformed bitcode.
4385 return nullptr;
4386
4387 Value *Ptr = CI->getArgOperand(0);
4388 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4389 if (!PtrTy) // Malformed.
4390 return nullptr;
4391
4392 Value *Val = CI->getArgOperand(1);
4393 if (Val->getType() != CI->getType()) // Malformed.
4394 return nullptr;
4395
4396 ConstantInt *OrderArg = nullptr;
4397 bool IsVolatile = false;
4398
4399 // These should have 5 arguments (plus the callee). A separate version of the
4400 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4401 if (NumOperands > 3)
4402 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4403
4404 // Ignore scope argument at 3
4405
4406 if (NumOperands > 5) {
4407 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4408 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4409 }
4410
4411 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4412 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4413 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4414 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4415 Order = AtomicOrdering::SequentiallyConsistent;
4416
4417 LLVMContext &Ctx = F->getContext();
4418
4419 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4420 Type *RetTy = CI->getType();
4421 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4422 if (VT->getElementType()->isIntegerTy(16)) {
4423 VectorType *AsBF16 =
4424 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4425 Val = Builder.CreateBitCast(Val, AsBF16);
4426 }
4427 }
4428
4429 // The scope argument never really worked correctly. Use agent as the most
4430 // conservative option which should still always produce the instruction.
4431 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4432 AtomicRMWInst *RMW =
4433 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4434
4435 unsigned AddrSpace = PtrTy->getAddressSpace();
4436 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4437 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4438 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4439 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4440 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4441 }
4442
4443 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4444 MDBuilder MDB(F->getContext());
4445 MDNode *RangeNotPrivate =
4448 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4449 }
4450
4451 if (IsVolatile)
4452 RMW->setVolatile(true);
4453
4454 return Builder.CreateBitCast(RMW, RetTy);
4455}
4456
4457/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4458/// plain MDNode, as it's the verifier's job to check these are the correct
4459/// types later.
4460static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4461 if (Op < CI->arg_size()) {
4462 if (MetadataAsValue *MAV =
4463 dyn_cast<MetadataAsValue>(CI->getArgOperand(Op))) {
4464 Metadata *MD = MAV->getMetadata();
4465 return dyn_cast_if_present<MDNode>(MD);
4466 }
4467 }
4468 return nullptr;
4469}
4470
4471/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4472static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4473 if (Op < CI->arg_size())
4474 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))
4475 return MAV->getMetadata();
4476 return nullptr;
4477}
4478
4480 // The MDNode attached to this instruction might not be the correct type,
4481 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4482 return I->getDebugLoc().getAsMDNode();
4483}
4484
4485/// Convert debug intrinsic calls to non-instruction debug records.
4486/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4487/// \p CI - The debug intrinsic call.
4489 DbgRecord *DR = nullptr;
4490 if (Name == "label") {
4492 CI->getDebugLoc());
4493 } else if (Name == "assign") {
4495 DbgVariableRecord::LocationType::Assign, unwrapMAVMetadataOp(CI, 0),
4496 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4497 unwrapMAVMetadataOp(CI, 4),
4498 /*The address is a Value ref, it will be stored as a Metadata */
4499 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4500 } else if (Name == "declare") {
4502 DbgVariableRecord::LocationType::Declare, unwrapMAVMetadataOp(CI, 0),
4503 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4504 getDebugLocSafe(CI));
4505 } else if (Name == "addr") {
4506 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4507 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4508 // Don't try to add something to the expression if it's not an expression.
4509 // Instead, allow the verifier to fail later.
4510 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4511 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4512 }
4514 DbgVariableRecord::LocationType::Value, unwrapMAVMetadataOp(CI, 0),
4515 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4516 getDebugLocSafe(CI));
4517 } else if (Name == "value") {
4518 // An old version of dbg.value had an extra offset argument.
4519 unsigned VarOp = 1;
4520 unsigned ExprOp = 2;
4521 if (CI->arg_size() == 4) {
4522 auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));
4523 // Nonzero offset dbg.values get dropped without a replacement.
4524 if (!Offset || !Offset->isZeroValue())
4525 return;
4526 VarOp = 2;
4527 ExprOp = 3;
4528 }
4530 DbgVariableRecord::LocationType::Value, unwrapMAVMetadataOp(CI, 0),
4531 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4532 nullptr, getDebugLocSafe(CI));
4533 }
4534 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4535 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4536}
4537
4538/// Upgrade a call to an old intrinsic. All argument and return casting must be
4539/// provided to seamlessly integrate with existing context.
4541 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4542 // checks the callee's function type matches. It's likely we need to handle
4543 // type changes here.
4544 Function *F = dyn_cast<Function>(CI->getCalledOperand());
4545 if (!F)
4546 return;
4547
4548 LLVMContext &C = CI->getContext();
4549 IRBuilder<> Builder(C);
4550 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4551
4552 if (!NewFn) {
4553 // Get the Function's name.
4554 StringRef Name = F->getName();
4555
4556 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4557 Name = Name.substr(5);
4558
4559 bool IsX86 = Name.consume_front("x86.");
4560 bool IsNVVM = Name.consume_front("nvvm.");
4561 bool IsAArch64 = Name.consume_front("aarch64.");
4562 bool IsARM = Name.consume_front("arm.");
4563 bool IsAMDGCN = Name.consume_front("amdgcn.");
4564 bool IsDbg = Name.consume_front("dbg.");
4565 Value *Rep = nullptr;
4566
4567 if (!IsX86 && Name == "stackprotectorcheck") {
4568 Rep = nullptr;
4569 } else if (IsNVVM) {
4570 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4571 } else if (IsX86) {
4572 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4573 } else if (IsAArch64) {
4574 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4575 } else if (IsARM) {
4576 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4577 } else if (IsAMDGCN) {
4578 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4579 } else if (IsDbg) {
4581 } else {
4582 llvm_unreachable("Unknown function for CallBase upgrade.");
4583 }
4584
4585 if (Rep)
4586 CI->replaceAllUsesWith(Rep);
4587 CI->eraseFromParent();
4588 return;
4589 }
4590
4591 const auto &DefaultCase = [&]() -> void {
4592 if (F == NewFn)
4593 return;
4594
4595 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4596 // Handle generic mangling change.
4597 assert(
4598 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4599 "Unknown function for CallBase upgrade and isn't just a name change");
4600 CI->setCalledFunction(NewFn);
4601 return;
4602 }
4603
4604 // This must be an upgrade from a named to a literal struct.
4605 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4606 assert(OldST != NewFn->getReturnType() &&
4607 "Return type must have changed");
4608 assert(OldST->getNumElements() ==
4609 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4610 "Must have same number of elements");
4611
4612 SmallVector<Value *> Args(CI->args());
4613 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4614 NewCI->setAttributes(CI->getAttributes());
4615 Value *Res = PoisonValue::get(OldST);
4616 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4617 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4618 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4619 }
4620 CI->replaceAllUsesWith(Res);
4621 CI->eraseFromParent();
4622 return;
4623 }
4624
4625 // We're probably about to produce something invalid. Let the verifier catch
4626 // it instead of dying here.
4627 CI->setCalledOperand(
4629 return;
4630 };
4631 CallInst *NewCall = nullptr;
4632 switch (NewFn->getIntrinsicID()) {
4633 default: {
4634 DefaultCase();
4635 return;
4636 }
4637 case Intrinsic::arm_neon_vst1:
4638 case Intrinsic::arm_neon_vst2:
4639 case Intrinsic::arm_neon_vst3:
4640 case Intrinsic::arm_neon_vst4:
4641 case Intrinsic::arm_neon_vst2lane:
4642 case Intrinsic::arm_neon_vst3lane:
4643 case Intrinsic::arm_neon_vst4lane: {
4644 SmallVector<Value *, 4> Args(CI->args());
4645 NewCall = Builder.CreateCall(NewFn, Args);
4646 break;
4647 }
4648 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4649 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4650 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4651 LLVMContext &Ctx = F->getParent()->getContext();
4652 SmallVector<Value *, 4> Args(CI->args());
4653 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4654 cast<ConstantInt>(Args[3])->getZExtValue());
4655 NewCall = Builder.CreateCall(NewFn, Args);
4656 break;
4657 }
4658 case Intrinsic::aarch64_sve_ld3_sret:
4659 case Intrinsic::aarch64_sve_ld4_sret:
4660 case Intrinsic::aarch64_sve_ld2_sret: {
4661 StringRef Name = F->getName();
4662 Name = Name.substr(5);
4663 unsigned N = StringSwitch<unsigned>(Name)
4664 .StartsWith("aarch64.sve.ld2", 2)
4665 .StartsWith("aarch64.sve.ld3", 3)
4666 .StartsWith("aarch64.sve.ld4", 4)
4667 .Default(0);
4668 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4669 unsigned MinElts = RetTy->getMinNumElements() / N;
4670 SmallVector<Value *, 2> Args(CI->args());
4671 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4673 for (unsigned I = 0; I < N; I++) {
4674 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4675 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
4676 }
4677 NewCall = dyn_cast<CallInst>(Ret);
4678 break;
4679 }
4680
4681 case Intrinsic::coro_end: {
4682 SmallVector<Value *, 3> Args(CI->args());
4683 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4684 NewCall = Builder.CreateCall(NewFn, Args);
4685 break;
4686 }
4687
4688 case Intrinsic::vector_extract: {
4689 StringRef Name = F->getName();
4690 Name = Name.substr(5); // Strip llvm
4691 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4692 DefaultCase();
4693 return;
4694 }
4695 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4696 unsigned MinElts = RetTy->getMinNumElements();
4697 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4698 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4699 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4700 break;
4701 }
4702
4703 case Intrinsic::vector_insert: {
4704 StringRef Name = F->getName();
4705 Name = Name.substr(5);
4706 if (!Name.starts_with("aarch64.sve.tuple")) {
4707 DefaultCase();
4708 return;
4709 }
4710 if (Name.starts_with("aarch64.sve.tuple.set")) {
4711 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4712 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4713 Value *NewIdx =
4714 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4715 NewCall = Builder.CreateCall(
4716 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4717 break;
4718 }
4719 if (Name.starts_with("aarch64.sve.tuple.create")) {
4720 unsigned N = StringSwitch<unsigned>(Name)
4721 .StartsWith("aarch64.sve.tuple.create2", 2)
4722 .StartsWith("aarch64.sve.tuple.create3", 3)
4723 .StartsWith("aarch64.sve.tuple.create4", 4)
4724 .Default(0);
4725 assert(N > 1 && "Create is expected to be between 2-4");
4726 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4728 unsigned MinElts = RetTy->getMinNumElements() / N;
4729 for (unsigned I = 0; I < N; I++) {
4730 Value *V = CI->getArgOperand(I);
4731 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
4732 }
4733 NewCall = dyn_cast<CallInst>(Ret);
4734 }
4735 break;
4736 }
4737
4738 case Intrinsic::arm_neon_bfdot:
4739 case Intrinsic::arm_neon_bfmmla:
4740 case Intrinsic::arm_neon_bfmlalb:
4741 case Intrinsic::arm_neon_bfmlalt:
4742 case Intrinsic::aarch64_neon_bfdot:
4743 case Intrinsic::aarch64_neon_bfmmla:
4744 case Intrinsic::aarch64_neon_bfmlalb:
4745 case Intrinsic::aarch64_neon_bfmlalt: {
4747 assert(CI->arg_size() == 3 &&
4748 "Mismatch between function args and call args");
4749 size_t OperandWidth =
4751 assert((OperandWidth == 64 || OperandWidth == 128) &&
4752 "Unexpected operand width");
4753 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4754 auto Iter = CI->args().begin();
4755 Args.push_back(*Iter++);
4756 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4757 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4758 NewCall = Builder.CreateCall(NewFn, Args);
4759 break;
4760 }
4761
4762 case Intrinsic::bitreverse:
4763 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4764 break;
4765
4766 case Intrinsic::ctlz:
4767 case Intrinsic::cttz:
4768 assert(CI->arg_size() == 1 &&
4769 "Mismatch between function args and call args");
4770 NewCall =
4771 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4772 break;
4773
4774 case Intrinsic::objectsize: {
4775 Value *NullIsUnknownSize =
4776 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4777 Value *Dynamic =
4778 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4779 NewCall = Builder.CreateCall(
4780 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4781 break;
4782 }
4783
4784 case Intrinsic::ctpop:
4785 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4786 break;
4787
4788 case Intrinsic::convert_from_fp16:
4789 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4790 break;
4791
4792 case Intrinsic::dbg_value: {
4793 StringRef Name = F->getName();
4794 Name = Name.substr(5); // Strip llvm.
4795 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4796 if (Name.starts_with("dbg.addr")) {
4797 DIExpression *Expr = cast<DIExpression>(
4798 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4799 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4800 NewCall =
4801 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4802 MetadataAsValue::get(C, Expr)});
4803 break;
4804 }
4805
4806 // Upgrade from the old version that had an extra offset argument.
4807 assert(CI->arg_size() == 4);
4808 // Drop nonzero offsets instead of attempting to upgrade them.
4809 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4810 if (Offset->isZeroValue()) {
4811 NewCall = Builder.CreateCall(
4812 NewFn,
4813 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4814 break;
4815 }
4816 CI->eraseFromParent();
4817 return;
4818 }
4819
4820 case Intrinsic::ptr_annotation:
4821 // Upgrade from versions that lacked the annotation attribute argument.
4822 if (CI->arg_size() != 4) {
4823 DefaultCase();
4824 return;
4825 }
4826
4827 // Create a new call with an added null annotation attribute argument.
4828 NewCall = Builder.CreateCall(
4829 NewFn,
4830 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4831 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4832 NewCall->takeName(CI);
4833 CI->replaceAllUsesWith(NewCall);
4834 CI->eraseFromParent();
4835 return;
4836
4837 case Intrinsic::var_annotation:
4838 // Upgrade from versions that lacked the annotation attribute argument.
4839 if (CI->arg_size() != 4) {
4840 DefaultCase();
4841 return;
4842 }
4843 // Create a new call with an added null annotation attribute argument.
4844 NewCall = Builder.CreateCall(
4845 NewFn,
4846 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4847 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4848 NewCall->takeName(CI);
4849 CI->replaceAllUsesWith(NewCall);
4850 CI->eraseFromParent();
4851 return;
4852
4853 case Intrinsic::riscv_aes32dsi:
4854 case Intrinsic::riscv_aes32dsmi:
4855 case Intrinsic::riscv_aes32esi:
4856 case Intrinsic::riscv_aes32esmi:
4857 case Intrinsic::riscv_sm4ks:
4858 case Intrinsic::riscv_sm4ed: {
4859 // The last argument to these intrinsics used to be i8 and changed to i32.
4860 // The type overload for sm4ks and sm4ed was removed.
4861 Value *Arg2 = CI->getArgOperand(2);
4862 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4863 return;
4864
4865 Value *Arg0 = CI->getArgOperand(0);
4866 Value *Arg1 = CI->getArgOperand(1);
4867 if (CI->getType()->isIntegerTy(64)) {
4868 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4869 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4870 }
4871
4872 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4873 cast<ConstantInt>(Arg2)->getZExtValue());
4874
4875 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4876 Value *Res = NewCall;
4877 if (Res->getType() != CI->getType())
4878 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4879 NewCall->takeName(CI);
4880 CI->replaceAllUsesWith(Res);
4881 CI->eraseFromParent();
4882 return;
4883 }
4884 case Intrinsic::nvvm_mapa_shared_cluster: {
4885 // Create a new call with the correct address space.
4886 NewCall =
4887 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
4888 Value *Res = NewCall;
4889 Res = Builder.CreateAddrSpaceCast(
4891 NewCall->takeName(CI);
4892 CI->replaceAllUsesWith(Res);
4893 CI->eraseFromParent();
4894 return;
4895 }
4896 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
4897 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
4898 // Create a new call with the correct address space.
4899 SmallVector<Value *, 4> Args(CI->args());
4900 Args[0] = Builder.CreateAddrSpaceCast(
4902
4903 NewCall = Builder.CreateCall(NewFn, Args);
4904 NewCall->takeName(CI);
4905 CI->replaceAllUsesWith(NewCall);
4906 CI->eraseFromParent();
4907 return;
4908 }
4909 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
4910 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
4911 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
4912 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
4913 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
4914 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
4915 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
4916 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
4917 SmallVector<Value *, 16> Args(CI->args());
4918
4919 // Create AddrSpaceCast to shared_cluster if needed.
4920 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
4921 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
4923 Args[0] = Builder.CreateAddrSpaceCast(
4925
4926 // Attach the flag argument for cta_group, with a
4927 // default value of 0. This handles case (2) in
4928 // shouldUpgradeNVPTXTMAG2SIntrinsics().
4929 size_t NumArgs = CI->arg_size();
4930 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
4931 if (!FlagArg->getType()->isIntegerTy(1))
4932 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
4933
4934 NewCall = Builder.CreateCall(NewFn, Args);
4935 NewCall->takeName(CI);
4936 CI->replaceAllUsesWith(NewCall);
4937 CI->eraseFromParent();
4938 return;
4939 }
4940 case Intrinsic::riscv_sha256sig0:
4941 case Intrinsic::riscv_sha256sig1:
4942 case Intrinsic::riscv_sha256sum0:
4943 case Intrinsic::riscv_sha256sum1:
4944 case Intrinsic::riscv_sm3p0:
4945 case Intrinsic::riscv_sm3p1: {
4946 // The last argument to these intrinsics used to be i8 and changed to i32.
4947 // The type overload for sm4ks and sm4ed was removed.
4948 if (!CI->getType()->isIntegerTy(64))
4949 return;
4950
4951 Value *Arg =
4952 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4953
4954 NewCall = Builder.CreateCall(NewFn, Arg);
4955 Value *Res =
4956 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4957 NewCall->takeName(CI);
4958 CI->replaceAllUsesWith(Res);
4959 CI->eraseFromParent();
4960 return;
4961 }
4962
4963 case Intrinsic::x86_xop_vfrcz_ss:
4964 case Intrinsic::x86_xop_vfrcz_sd:
4965 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4966 break;
4967
4968 case Intrinsic::x86_xop_vpermil2pd:
4969 case Intrinsic::x86_xop_vpermil2ps:
4970 case Intrinsic::x86_xop_vpermil2pd_256:
4971 case Intrinsic::x86_xop_vpermil2ps_256: {
4972 SmallVector<Value *, 4> Args(CI->args());
4973 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4974 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4975 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4976 NewCall = Builder.CreateCall(NewFn, Args);
4977 break;
4978 }
4979
4980 case Intrinsic::x86_sse41_ptestc:
4981 case Intrinsic::x86_sse41_ptestz:
4982 case Intrinsic::x86_sse41_ptestnzc: {
4983 // The arguments for these intrinsics used to be v4f32, and changed
4984 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4985 // So, the only thing required is a bitcast for both arguments.
4986 // First, check the arguments have the old type.
4987 Value *Arg0 = CI->getArgOperand(0);
4988 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4989 return;
4990
4991 // Old intrinsic, add bitcasts
4992 Value *Arg1 = CI->getArgOperand(1);
4993
4994 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4995
4996 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4997 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4998
4999 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5000 break;
5001 }
5002
5003 case Intrinsic::x86_rdtscp: {
5004 // This used to take 1 arguments. If we have no arguments, it is already
5005 // upgraded.
5006 if (CI->getNumOperands() == 0)
5007 return;
5008
5009 NewCall = Builder.CreateCall(NewFn);
5010 // Extract the second result and store it.
5011 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5012 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5013 // Replace the original call result with the first result of the new call.
5014 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5015
5016 NewCall->takeName(CI);
5017 CI->replaceAllUsesWith(TSC);
5018 CI->eraseFromParent();
5019 return;
5020 }
5021
5022 case Intrinsic::x86_sse41_insertps:
5023 case Intrinsic::x86_sse41_dppd:
5024 case Intrinsic::x86_sse41_dpps:
5025 case Intrinsic::x86_sse41_mpsadbw:
5026 case Intrinsic::x86_avx_dp_ps_256:
5027 case Intrinsic::x86_avx2_mpsadbw: {
5028 // Need to truncate the last argument from i32 to i8 -- this argument models
5029 // an inherently 8-bit immediate operand to these x86 instructions.
5030 SmallVector<Value *, 4> Args(CI->args());
5031
5032 // Replace the last argument with a trunc.
5033 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5034 NewCall = Builder.CreateCall(NewFn, Args);
5035 break;
5036 }
5037
5038 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5039 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5040 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5041 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5042 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5043 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5044 SmallVector<Value *, 4> Args(CI->args());
5045 unsigned NumElts =
5046 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5047 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5048
5049 NewCall = Builder.CreateCall(NewFn, Args);
5050 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5051
5052 NewCall->takeName(CI);
5053 CI->replaceAllUsesWith(Res);
5054 CI->eraseFromParent();
5055 return;
5056 }
5057
5058 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5059 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5060 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5061 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5062 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5063 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5064 SmallVector<Value *, 4> Args(CI->args());
5065 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5066 if (NewFn->getIntrinsicID() ==
5067 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5068 Args[1] = Builder.CreateBitCast(
5069 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5070
5071 NewCall = Builder.CreateCall(NewFn, Args);
5072 Value *Res = Builder.CreateBitCast(
5073 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5074
5075 NewCall->takeName(CI);
5076 CI->replaceAllUsesWith(Res);
5077 CI->eraseFromParent();
5078 return;
5079 }
5080 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5081 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5082 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5083 SmallVector<Value *, 4> Args(CI->args());
5084 unsigned NumElts =
5085 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5086 Args[1] = Builder.CreateBitCast(
5087 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5088 Args[2] = Builder.CreateBitCast(
5089 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5090
5091 NewCall = Builder.CreateCall(NewFn, Args);
5092 break;
5093 }
5094
5095 case Intrinsic::thread_pointer: {
5096 NewCall = Builder.CreateCall(NewFn, {});
5097 break;
5098 }
5099
5100 case Intrinsic::memcpy:
5101 case Intrinsic::memmove:
5102 case Intrinsic::memset: {
5103 // We have to make sure that the call signature is what we're expecting.
5104 // We only want to change the old signatures by removing the alignment arg:
5105 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5106 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5107 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5108 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5109 // Note: i8*'s in the above can be any pointer type
5110 if (CI->arg_size() != 5) {
5111 DefaultCase();
5112 return;
5113 }
5114 // Remove alignment argument (3), and add alignment attributes to the
5115 // dest/src pointers.
5116 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5117 CI->getArgOperand(2), CI->getArgOperand(4)};
5118 NewCall = Builder.CreateCall(NewFn, Args);
5119 AttributeList OldAttrs = CI->getAttributes();
5121 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5122 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5123 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5124 NewCall->setAttributes(NewAttrs);
5125 auto *MemCI = cast<MemIntrinsic>(NewCall);
5126 // All mem intrinsics support dest alignment.
5127 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
5128 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5129 // Memcpy/Memmove also support source alignment.
5130 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5131 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5132 break;
5133 }
5134
5135 case Intrinsic::lifetime_start:
5136 case Intrinsic::lifetime_end: {
5137 if (CI->arg_size() != 2) {
5138 DefaultCase();
5139 return;
5140 }
5141
5142 Value *Ptr = CI->getArgOperand(1);
5143 // Try to strip pointer casts, such that the lifetime works on an alloca.
5144 Ptr = Ptr->stripPointerCasts();
5145 if (isa<AllocaInst>(Ptr)) {
5146 // Don't use NewFn, as we might have looked through an addrspacecast.
5147 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5148 NewCall = Builder.CreateLifetimeStart(Ptr);
5149 else
5150 NewCall = Builder.CreateLifetimeEnd(Ptr);
5151 break;
5152 }
5153
5154 // Otherwise remove the lifetime marker.
5155 CI->eraseFromParent();
5156 return;
5157 }
5158 }
5159 assert(NewCall && "Should have either set this variable or returned through "
5160 "the default case");
5161 NewCall->takeName(CI);
5162 CI->replaceAllUsesWith(NewCall);
5163 CI->eraseFromParent();
5164}
5165
5167 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5168
5169 // Check if this function should be upgraded and get the replacement function
5170 // if there is one.
5171 Function *NewFn;
5172 if (UpgradeIntrinsicFunction(F, NewFn)) {
5173 // Replace all users of the old function with the new function or new
5174 // instructions. This is not a range loop because the call is deleted.
5175 for (User *U : make_early_inc_range(F->users()))
5176 if (CallBase *CB = dyn_cast<CallBase>(U))
5177 UpgradeIntrinsicCall(CB, NewFn);
5178
5179 // Remove old function, no longer used, from the module.
5180 if (F != NewFn)
5181 F->eraseFromParent();
5182 }
5183}
5184
5186 const unsigned NumOperands = MD.getNumOperands();
5187 if (NumOperands == 0)
5188 return &MD; // Invalid, punt to a verifier error.
5189
5190 // Check if the tag uses struct-path aware TBAA format.
5191 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5192 return &MD;
5193
5194 auto &Context = MD.getContext();
5195 if (NumOperands == 3) {
5196 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5197 MDNode *ScalarType = MDNode::get(Context, Elts);
5198 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5199 Metadata *Elts2[] = {ScalarType, ScalarType,
5202 MD.getOperand(2)};
5203 return MDNode::get(Context, Elts2);
5204 }
5205 // Create a MDNode <MD, MD, offset 0>
5208 return MDNode::get(Context, Elts);
5209}
5210
5212 Instruction *&Temp) {
5213 if (Opc != Instruction::BitCast)
5214 return nullptr;
5215
5216 Temp = nullptr;
5217 Type *SrcTy = V->getType();
5218 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5219 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5220 LLVMContext &Context = V->getContext();
5221
5222 // We have no information about target data layout, so we assume that
5223 // the maximum pointer size is 64bit.
5224 Type *MidTy = Type::getInt64Ty(Context);
5225 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5226
5227 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5228 }
5229
5230 return nullptr;
5231}
5232
5234 if (Opc != Instruction::BitCast)
5235 return nullptr;
5236
5237 Type *SrcTy = C->getType();
5238 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5239 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5240 LLVMContext &Context = C->getContext();
5241
5242 // We have no information about target data layout, so we assume that
5243 // the maximum pointer size is 64bit.
5244 Type *MidTy = Type::getInt64Ty(Context);
5245
5247 DestTy);
5248 }
5249
5250 return nullptr;
5251}
5252
5253/// Check the debug info version number, if it is out-dated, drop the debug
5254/// info. Return true if module is modified.
5257 return false;
5258
5259 // We need to get metadata before the module is verified (i.e., getModuleFlag
5260 // makes assumptions that we haven't verified yet). Carefully extract the flag
5261 // from the metadata.
5262 unsigned Version = 0;
5263 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5264 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5265 if (Flag->getNumOperands() < 3)
5266 return false;
5267 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5268 return K->getString() == "Debug Info Version";
5269 return false;
5270 });
5271 if (OpIt != ModFlags->op_end()) {
5272 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5273 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5274 Version = CI->getZExtValue();
5275 }
5276 }
5277
5278 if (Version == DEBUG_METADATA_VERSION) {
5279 bool BrokenDebugInfo = false;
5280 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5281 report_fatal_error("Broken module found, compilation aborted!");
5282 if (!BrokenDebugInfo)
5283 // Everything is ok.
5284 return false;
5285 else {
5286 // Diagnose malformed debug info.
5288 M.getContext().diagnose(Diag);
5289 }
5290 }
5291 bool Modified = StripDebugInfo(M);
5292 if (Modified && Version != DEBUG_METADATA_VERSION) {
5293 // Diagnose a version mismatch.
5294 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
5295 M.getContext().diagnose(DiagVersion);
5296 }
5297 return Modified;
5298}
5299
5300static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5301 GlobalValue *GV, const Metadata *V) {
5302 Function *F = cast<Function>(GV);
5303
5304 constexpr StringLiteral DefaultValue = "1";
5305 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5306 unsigned Length = 0;
5307
5308 if (F->hasFnAttribute(Attr)) {
5309 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5310 // parse these elements placing them into Vect3
5311 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5312 for (; Length < 3 && !S.empty(); Length++) {
5313 auto [Part, Rest] = S.split(',');
5314 Vect3[Length] = Part.trim();
5315 S = Rest;
5316 }
5317 }
5318
5319 const unsigned Dim = DimC - 'x';
5320 assert(Dim < 3 && "Unexpected dim char");
5321
5322 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5323
5324 // local variable required for StringRef in Vect3 to point to.
5325 const std::string VStr = llvm::utostr(VInt);
5326 Vect3[Dim] = VStr;
5327 Length = std::max(Length, Dim + 1);
5328
5329 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5330 F->addFnAttr(Attr, NewAttr);
5331}
5332
5333static inline bool isXYZ(StringRef S) {
5334 return S == "x" || S == "y" || S == "z";
5335}
5336
5338 const Metadata *V) {
5339 if (K == "kernel") {
5340 if (!mdconst::extract<ConstantInt>(V)->isZero())
5341 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5342 return true;
5343 }
5344 if (K == "align") {
5345 // V is a bitfeild specifying two 16-bit values. The alignment value is
5346 // specfied in low 16-bits, The index is specified in the high bits. For the
5347 // index, 0 indicates the return value while higher values correspond to
5348 // each parameter (idx = param + 1).
5349 const uint64_t AlignIdxValuePair =
5350 mdconst::extract<ConstantInt>(V)->getZExtValue();
5351 const unsigned Idx = (AlignIdxValuePair >> 16);
5352 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5353 cast<Function>(GV)->addAttributeAtIndex(
5354 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5355 return true;
5356 }
5357 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5358 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5359 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5360 return true;
5361 }
5362 if (K == "minctasm") {
5363 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5364 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5365 return true;
5366 }
5367 if (K == "maxnreg") {
5368 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5369 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5370 return true;
5371 }
5372 if (K.consume_front("maxntid") && isXYZ(K)) {
5373 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5374 return true;
5375 }
5376 if (K.consume_front("reqntid") && isXYZ(K)) {
5377 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5378 return true;
5379 }
5380 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5381 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5382 return true;
5383 }
5384 if (K == "grid_constant") {
5385 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5386 for (const auto &Op : cast<MDNode>(V)->operands()) {
5387 // For some reason, the index is 1-based in the metadata. Good thing we're
5388 // able to auto-upgrade it!
5389 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5390 cast<Function>(GV)->addParamAttr(Index, Attr);
5391 }
5392 return true;
5393 }
5394
5395 return false;
5396}
5397
5399 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5400 if (!NamedMD)
5401 return;
5402
5403 SmallVector<MDNode *, 8> NewNodes;
5405 for (MDNode *MD : NamedMD->operands()) {
5406 if (!SeenNodes.insert(MD).second)
5407 continue;
5408
5409 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5410 if (!GV)
5411 continue;
5412
5413 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5414
5415 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5416 // Each nvvm.annotations metadata entry will be of the following form:
5417 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5418 // start index = 1, to skip the global variable key
5419 // increment = 2, to skip the value for each property-value pairs
5420 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5421 MDString *K = cast<MDString>(MD->getOperand(j));
5422 const MDOperand &V = MD->getOperand(j + 1);
5423 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5424 if (!Upgraded)
5425 NewOperands.append({K, V});
5426 }
5427
5428 if (NewOperands.size() > 1)
5429 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5430 }
5431
5432 NamedMD->clearOperands();
5433 for (MDNode *N : NewNodes)
5434 NamedMD->addOperand(N);
5435}
5436
5437/// This checks for objc retain release marker which should be upgraded. It
5438/// returns true if module is modified.
5440 bool Changed = false;
5441 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5442 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5443 if (ModRetainReleaseMarker) {
5444 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5445 if (Op) {
5446 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5447 if (ID) {
5448 SmallVector<StringRef, 4> ValueComp;
5449 ID->getString().split(ValueComp, "#");
5450 if (ValueComp.size() == 2) {
5451 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5452 ID = MDString::get(M.getContext(), NewValue);
5453 }
5454 M.addModuleFlag(Module::Error, MarkerKey, ID);
5455 M.eraseNamedMetadata(ModRetainReleaseMarker);
5456 Changed = true;
5457 }
5458 }
5459 }
5460 return Changed;
5461}
5462
5464 // This lambda converts normal function calls to ARC runtime functions to
5465 // intrinsic calls.
5466 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5467 llvm::Intrinsic::ID IntrinsicFunc) {
5468 Function *Fn = M.getFunction(OldFunc);
5469
5470 if (!Fn)
5471 return;
5472
5473 Function *NewFn =
5474 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5475
5476 for (User *U : make_early_inc_range(Fn->users())) {
5477 CallInst *CI = dyn_cast<CallInst>(U);
5478 if (!CI || CI->getCalledFunction() != Fn)
5479 continue;
5480
5481 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5482 FunctionType *NewFuncTy = NewFn->getFunctionType();
5484
5485 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5486 // value to the return type of the old function.
5487 if (NewFuncTy->getReturnType() != CI->getType() &&
5488 !CastInst::castIsValid(Instruction::BitCast, CI,
5489 NewFuncTy->getReturnType()))
5490 continue;
5491
5492 bool InvalidCast = false;
5493
5494 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5495 Value *Arg = CI->getArgOperand(I);
5496
5497 // Bitcast argument to the parameter type of the new function if it's
5498 // not a variadic argument.
5499 if (I < NewFuncTy->getNumParams()) {
5500 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5501 // to the parameter type of the new function.
5502 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5503 NewFuncTy->getParamType(I))) {
5504 InvalidCast = true;
5505 break;
5506 }
5507 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5508 }
5509 Args.push_back(Arg);
5510 }
5511
5512 if (InvalidCast)
5513 continue;
5514
5515 // Create a call instruction that calls the new function.
5516 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5517 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5518 NewCall->takeName(CI);
5519
5520 // Bitcast the return value back to the type of the old call.
5521 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5522
5523 if (!CI->use_empty())
5524 CI->replaceAllUsesWith(NewRetVal);
5525 CI->eraseFromParent();
5526 }
5527
5528 if (Fn->use_empty())
5529 Fn->eraseFromParent();
5530 };
5531
5532 // Unconditionally convert a call to "clang.arc.use" to a call to
5533 // "llvm.objc.clang.arc.use".
5534 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5535
5536 // Upgrade the retain release marker. If there is no need to upgrade
5537 // the marker, that means either the module is already new enough to contain
5538 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5540 return;
5541
5542 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5543 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5544 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5545 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5546 {"objc_autoreleaseReturnValue",
5547 llvm::Intrinsic::objc_autoreleaseReturnValue},
5548 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5549 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5550 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5551 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5552 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5553 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5554 {"objc_release", llvm::Intrinsic::objc_release},
5555 {"objc_retain", llvm::Intrinsic::objc_retain},
5556 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5557 {"objc_retainAutoreleaseReturnValue",
5558 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5559 {"objc_retainAutoreleasedReturnValue",
5560 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5561 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5562 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5563 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5564 {"objc_unsafeClaimAutoreleasedReturnValue",
5565 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5566 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5567 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5568 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5569 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5570 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5571 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5572 {"objc_arc_annotation_topdown_bbstart",
5573 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5574 {"objc_arc_annotation_topdown_bbend",
5575 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5576 {"objc_arc_annotation_bottomup_bbstart",
5577 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5578 {"objc_arc_annotation_bottomup_bbend",
5579 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5580
5581 for (auto &I : RuntimeFuncs)
5582 UpgradeToIntrinsic(I.first, I.second);
5583}
5584
5586 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5587 if (!ModFlags)
5588 return false;
5589
5590 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5591 bool HasSwiftVersionFlag = false;
5592 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5593 uint32_t SwiftABIVersion;
5594 auto Int8Ty = Type::getInt8Ty(M.getContext());
5595 auto Int32Ty = Type::getInt32Ty(M.getContext());
5596
5597 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5598 MDNode *Op = ModFlags->getOperand(I);
5599 if (Op->getNumOperands() != 3)
5600 continue;
5601 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5602 if (!ID)
5603 continue;
5604 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5605 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5606 Type::getInt32Ty(M.getContext()), B)),
5607 MDString::get(M.getContext(), ID->getString()),
5608 Op->getOperand(2)};
5609 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5610 Changed = true;
5611 };
5612
5613 if (ID->getString() == "Objective-C Image Info Version")
5614 HasObjCFlag = true;
5615 if (ID->getString() == "Objective-C Class Properties")
5616 HasClassProperties = true;
5617 // Upgrade PIC from Error/Max to Min.
5618 if (ID->getString() == "PIC Level") {
5619 if (auto *Behavior =
5620 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5621 uint64_t V = Behavior->getLimitedValue();
5622 if (V == Module::Error || V == Module::Max)
5623 SetBehavior(Module::Min);
5624 }
5625 }
5626 // Upgrade "PIE Level" from Error to Max.
5627 if (ID->getString() == "PIE Level")
5628 if (auto *Behavior =
5629 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5630 if (Behavior->getLimitedValue() == Module::Error)
5631 SetBehavior(Module::Max);
5632
5633 // Upgrade branch protection and return address signing module flags. The
5634 // module flag behavior for these fields were Error and now they are Min.
5635 if (ID->getString() == "branch-target-enforcement" ||
5636 ID->getString().starts_with("sign-return-address")) {
5637 if (auto *Behavior =
5638 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5639 if (Behavior->getLimitedValue() == Module::Error) {
5640 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5641 Metadata *Ops[3] = {
5642 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5643 Op->getOperand(1), Op->getOperand(2)};
5644 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5645 Changed = true;
5646 }
5647 }
5648 }
5649
5650 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5651 // section name so that llvm-lto will not complain about mismatching
5652 // module flags that is functionally the same.
5653 if (ID->getString() == "Objective-C Image Info Section") {
5654 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5655 SmallVector<StringRef, 4> ValueComp;
5656 Value->getString().split(ValueComp, " ");
5657 if (ValueComp.size() != 1) {
5658 std::string NewValue;
5659 for (auto &S : ValueComp)
5660 NewValue += S.str();
5661 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5662 MDString::get(M.getContext(), NewValue)};
5663 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5664 Changed = true;
5665 }
5666 }
5667 }
5668
5669 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5670 // If the higher bits are set, it adds new module flag for swift info.
5671 if (ID->getString() == "Objective-C Garbage Collection") {
5672 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5673 if (Md) {
5674 assert(Md->getValue() && "Expected non-empty metadata");
5675 auto Type = Md->getValue()->getType();
5676 if (Type == Int8Ty)
5677 continue;
5678 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5679 if ((Val & 0xff) != Val) {
5680 HasSwiftVersionFlag = true;
5681 SwiftABIVersion = (Val & 0xff00) >> 8;
5682 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5683 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5684 }
5685 Metadata *Ops[3] = {
5686 ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5687 Op->getOperand(1),
5688 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5689 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5690 Changed = true;
5691 }
5692 }
5693
5694 if (ID->getString() == "amdgpu_code_object_version") {
5695 Metadata *Ops[3] = {
5696 Op->getOperand(0),
5697 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5698 Op->getOperand(2)};
5699 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5700 Changed = true;
5701 }
5702 }
5703
5704 // "Objective-C Class Properties" is recently added for Objective-C. We
5705 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5706 // flag of value 0, so we can correclty downgrade this flag when trying to
5707 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5708 // this module flag.
5709 if (HasObjCFlag && !HasClassProperties) {
5710 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5711 (uint32_t)0);
5712 Changed = true;
5713 }
5714
5715 if (HasSwiftVersionFlag) {
5716 M.addModuleFlag(Module::Error, "Swift ABI Version",
5717 SwiftABIVersion);
5718 M.addModuleFlag(Module::Error, "Swift Major Version",
5719 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5720 M.addModuleFlag(Module::Error, "Swift Minor Version",
5721 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5722 Changed = true;
5723 }
5724
5725 return Changed;
5726}
5727
5729 auto TrimSpaces = [](StringRef Section) -> std::string {
5730 SmallVector<StringRef, 5> Components;
5731 Section.split(Components, ',');
5732
5733 SmallString<32> Buffer;
5734 raw_svector_ostream OS(Buffer);
5735
5736 for (auto Component : Components)
5737 OS << ',' << Component.trim();
5738
5739 return std::string(OS.str().substr(1));
5740 };
5741
5742 for (auto &GV : M.globals()) {
5743 if (!GV.hasSection())
5744 continue;
5745
5746 StringRef Section = GV.getSection();
5747
5748 if (!Section.starts_with("__DATA, __objc_catlist"))
5749 continue;
5750
5751 // __DATA, __objc_catlist, regular, no_dead_strip
5752 // __DATA,__objc_catlist,regular,no_dead_strip
5753 GV.setSection(TrimSpaces(Section));
5754 }
5755}
5756
5757namespace {
5758// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5759// callsites within a function that did not also have the strictfp attribute.
5760// Since 10.0, if strict FP semantics are needed within a function, the
5761// function must have the strictfp attribute and all calls within the function
5762// must also have the strictfp attribute. This latter restriction is
5763// necessary to prevent unwanted libcall simplification when a function is
5764// being cloned (such as for inlining).
5765//
5766// The "dangling" strictfp attribute usage was only used to prevent constant
5767// folding and other libcall simplification. The nobuiltin attribute on the
5768// callsite has the same effect.
5769struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5770 StrictFPUpgradeVisitor() = default;
5771
5772 void visitCallBase(CallBase &Call) {
5773 if (!Call.isStrictFP())
5774 return;
5775 if (isa<ConstrainedFPIntrinsic>(&Call))
5776 return;
5777 // If we get here, the caller doesn't have the strictfp attribute
5778 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5779 Call.removeFnAttr(Attribute::StrictFP);
5780 Call.addFnAttr(Attribute::NoBuiltin);
5781 }
5782};
5783
5784/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
5785struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5786 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5787 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
5788
5790 if (!RMW.isFloatingPointOperation())
5791 return;
5792
5793 MDNode *Empty = MDNode::get(RMW.getContext(), {});
5794 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
5795 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
5796 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
5797 }
5798};
5799} // namespace
5800
5802 // If a function definition doesn't have the strictfp attribute,
5803 // convert any callsite strictfp attributes to nobuiltin.
5804 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5805 StrictFPUpgradeVisitor SFPV;
5806 SFPV.visit(F);
5807 }
5808
5809 // Remove all incompatibile attributes from function.
5810 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
5811 F.getReturnType(), F.getAttributes().getRetAttrs()));
5812 for (auto &Arg : F.args())
5813 Arg.removeAttrs(
5814 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
5815
5816 // Older versions of LLVM treated an "implicit-section-name" attribute
5817 // similarly to directly setting the section on a Function.
5818 if (Attribute A = F.getFnAttribute("implicit-section-name");
5819 A.isValid() && A.isStringAttribute()) {
5820 F.setSection(A.getValueAsString());
5821 F.removeFnAttr("implicit-section-name");
5822 }
5823
5824 if (!F.empty()) {
5825 // For some reason this is called twice, and the first time is before any
5826 // instructions are loaded into the body.
5827
5828 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
5829 A.isValid()) {
5830
5831 if (A.getValueAsBool()) {
5832 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5833 Visitor.visit(F);
5834 }
5835
5836 // We will leave behind dead attribute uses on external declarations, but
5837 // clang never added these to declarations anyway.
5838 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
5839 }
5840 }
5841}
5842
5843static bool isOldLoopArgument(Metadata *MD) {
5844 auto *T = dyn_cast_or_null<MDTuple>(MD);
5845 if (!T)
5846 return false;
5847 if (T->getNumOperands() < 1)
5848 return false;
5849 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5850 if (!S)
5851 return false;
5852 return S->getString().starts_with("llvm.vectorizer.");
5853}
5854
5856 StringRef OldPrefix = "llvm.vectorizer.";
5857 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5858
5859 if (OldTag == "llvm.vectorizer.unroll")
5860 return MDString::get(C, "llvm.loop.interleave.count");
5861
5862 return MDString::get(
5863 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5864 .str());
5865}
5866
5868 auto *T = dyn_cast_or_null<MDTuple>(MD);
5869 if (!T)
5870 return MD;
5871 if (T->getNumOperands() < 1)
5872 return MD;
5873 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5874 if (!OldTag)
5875 return MD;
5876 if (!OldTag->getString().starts_with("llvm.vectorizer."))
5877 return MD;
5878
5879 // This has an old tag. Upgrade it.
5881 Ops.reserve(T->getNumOperands());
5882 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5883 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5884 Ops.push_back(T->getOperand(I));
5885
5886 return MDTuple::get(T->getContext(), Ops);
5887}
5888
5890 auto *T = dyn_cast<MDTuple>(&N);
5891 if (!T)
5892 return &N;
5893
5894 if (none_of(T->operands(), isOldLoopArgument))
5895 return &N;
5896
5898 Ops.reserve(T->getNumOperands());
5899 for (Metadata *MD : T->operands())
5901
5902 return MDTuple::get(T->getContext(), Ops);
5903}
5904
5906 Triple T(TT);
5907 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5908 // the address space of globals to 1. This does not apply to SPIRV Logical.
5909 if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5910 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5911 !DL.contains("-G") && !DL.starts_with("G")) {
5912 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5913 }
5914
5915 if (T.isLoongArch64() || T.isRISCV64()) {
5916 // Make i32 a native type for 64-bit LoongArch and RISC-V.
5917 auto I = DL.find("-n64-");
5918 if (I != StringRef::npos)
5919 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5920 return DL.str();
5921 }
5922
5923 std::string Res = DL.str();
5924 // AMDGCN data layout upgrades.
5925 if (T.isAMDGCN()) {
5926 // Define address spaces for constants.
5927 if (!DL.contains("-G") && !DL.starts_with("G"))
5928 Res.append(Res.empty() ? "G1" : "-G1");
5929
5930 // Add missing non-integral declarations.
5931 // This goes before adding new address spaces to prevent incoherent string
5932 // values.
5933 if (!DL.contains("-ni") && !DL.starts_with("ni"))
5934 Res.append("-ni:7:8:9");
5935 // Update ni:7 to ni:7:8:9.
5936 if (DL.ends_with("ni:7"))
5937 Res.append(":8:9");
5938 if (DL.ends_with("ni:7:8"))
5939 Res.append(":9");
5940
5941 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5942 // resources) An empty data layout has already been upgraded to G1 by now.
5943 if (!DL.contains("-p7") && !DL.starts_with("p7"))
5944 Res.append("-p7:160:256:256:32");
5945 if (!DL.contains("-p8") && !DL.starts_with("p8"))
5946 Res.append("-p8:128:128:128:48");
5947 constexpr StringRef OldP8("-p8:128:128-");
5948 if (DL.contains(OldP8))
5949 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
5950 if (!DL.contains("-p9") && !DL.starts_with("p9"))
5951 Res.append("-p9:192:256:256:32");
5952
5953 return Res;
5954 }
5955
5956 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
5957 // If the datalayout matches the expected format, add pointer size address
5958 // spaces to the datalayout.
5959 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
5960 if (!DL.contains(AddrSpaces)) {
5962 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
5963 if (R.match(Res, &Groups))
5964 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5965 }
5966 };
5967
5968 // AArch64 data layout upgrades.
5969 if (T.isAArch64()) {
5970 // Add "-Fn32"
5971 if (!DL.empty() && !DL.contains("-Fn32"))
5972 Res.append("-Fn32");
5973 AddPtr32Ptr64AddrSpaces();
5974 return Res;
5975 }
5976
5977 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
5978 T.isWasm()) {
5979 // Mips64 with o32 ABI did not add "-i128:128".
5980 // Add "-i128:128"
5981 std::string I64 = "-i64:64";
5982 std::string I128 = "-i128:128";
5983 if (!StringRef(Res).contains(I128)) {
5984 size_t Pos = Res.find(I64);
5985 if (Pos != size_t(-1))
5986 Res.insert(Pos + I64.size(), I128);
5987 }
5988 return Res;
5989 }
5990
5991 if (!T.isX86())
5992 return Res;
5993
5994 AddPtr32Ptr64AddrSpaces();
5995
5996 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5997 // for i128 operations prior to this being reflected in the data layout, and
5998 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5999 // boundaries, so although this is a breaking change, the upgrade is expected
6000 // to fix more IR than it breaks.
6001 // Intel MCU is an exception and uses 4-byte-alignment.
6002 if (!T.isOSIAMCU()) {
6003 std::string I128 = "-i128:128";
6004 if (StringRef Ref = Res; !Ref.contains(I128)) {
6006 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6007 if (R.match(Res, &Groups))
6008 Res = (Groups[1] + I128 + Groups[3]).str();
6009 }
6010 }
6011
6012 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6013 // Raising the alignment is safe because Clang did not produce f80 values in
6014 // the MSVC environment before this upgrade was added.
6015 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6016 StringRef Ref = Res;
6017 auto I = Ref.find("-f80:32-");
6018 if (I != StringRef::npos)
6019 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6020 }
6021
6022 return Res;
6023}
6024
6026 StringRef FramePointer;
6027 Attribute A = B.getAttribute("no-frame-pointer-elim");
6028 if (A.isValid()) {
6029 // The value can be "true" or "false".
6030 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6031 B.removeAttribute("no-frame-pointer-elim");
6032 }
6033 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6034 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6035 if (FramePointer != "all")
6036 FramePointer = "non-leaf";
6037 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6038 }
6039 if (!FramePointer.empty())
6040 B.addAttribute("frame-pointer", FramePointer);
6041
6042 A = B.getAttribute("null-pointer-is-valid");
6043 if (A.isValid()) {
6044 // The value can be "true" or "false".
6045 bool NullPointerIsValid = A.getValueAsString() == "true";
6046 B.removeAttribute("null-pointer-is-valid");
6047 if (NullPointerIsValid)
6048 B.addAttribute(Attribute::NullPointerIsValid);
6049 }
6050}
6051
6052void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6053 // clang.arc.attachedcall bundles are now required to have an operand.
6054 // If they don't, it's okay to drop them entirely: when there is an operand,
6055 // the "attachedcall" is meaningful and required, but without an operand,
6056 // it's just a marker NOP. Dropping it merely prevents an optimization.
6057 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6058 return OBD.getTag() == "clang.arc.attachedcall" &&
6059 OBD.inputs().empty();
6060 });
6061}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:98
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:82
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
Definition: AutoUpgrade.cpp:62
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:66
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:86
This file contains constants used for implementing Dwarf debug support.
std::string Name
uint64_t Size
Module.h This file contains the declarations for the Module class.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define R2(n)
This file contains the declarations for metadata subclasses.
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
raw_pwrite_stream & OS
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:480
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Class to represent array types.
Definition: DerivedTypes.h:398
Type * getElementType() const
Definition: DerivedTypes.h:411
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:709
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
Definition: Instructions.h:857
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:721
@ FAdd
*p = old + v
Definition: Instructions.h:746
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:769
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:757
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:753
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:773
bool isFloatingPointOperation() const
Definition: Instructions.h:898
LLVM_ABI AttributeSet getFnAttrs() const
The function attributes are returned.
static LLVM_ABI AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
LLVM_ABI AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
Definition: Attributes.cpp:239
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:95
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1348
Value * getCalledOperand() const
Definition: InstrTypes.h:1340
void setAttributes(AttributeList A)
Set the attributes for this call.
Definition: InstrTypes.h:1427
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1292
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1205
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1283
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1384
unsigned arg_size() const
Definition: InstrTypes.h:1290
AttributeList getAttributes() const
Return the attributes for this call.
Definition: InstrTypes.h:1424
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1387
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1677
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1314
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:535
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2314
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:2246
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2300
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:214
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1833
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1380
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
Definition: Constants.cpp:1526
This is an important base class in LLVM.
Definition: Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
This class represents an Operation in the Expression.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:22
void setApproxFunc(bool B=true)
Definition: FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:803
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
Type * getReturnType() const
Definition: DerivedTypes.h:126
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:166
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:209
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:244
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Function.cpp:448
size_t arg_size() const
Definition: Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:214
Argument * getArg(unsigned i) const
Definition: Function.h:884
LinkageTypes getLinkage() const
Definition: GlobalValue.h:548
Type * getValueType() const
Definition: GlobalValue.h:298
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:497
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1632
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2571
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:547
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2625
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1670
LLVM_ABI CallInst * CreateLifetimeStart(Value *Ptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:414
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Definition: IRBuilder.h:1107
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2155
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2559
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:575
LLVM_ABI CallInst * CreateLifetimeEnd(Value *Ptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:420
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1864
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1613
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2162
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1115
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2618
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:488
Value * CreateICmpSGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2357
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1005
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2094
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:562
ConstantInt * getInt8(uint8_t C)
Get a constant 8-bit value.
Definition: IRBuilder.h:512
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2142
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:567
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Definition: IRBuilder.h:557
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:527
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1781
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:834
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:522
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2365
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1805
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2329
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1420
Value * CreateFMA(Value *Factor1, Value *Factor2, Value *Summand, FMFSource FMFSource={}, const Twine &Name="")
Create call to the fma intrinsic.
Definition: IRBuilder.h:1073
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2204
LLVM_ABI CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
Definition: IRBuilder.cpp:815
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1847
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1492
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2082
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2593
LLVMContext & getContext() const
Definition: IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1551
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:508
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1403
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:507
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Definition: IRBuilder.h:590
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
Definition: IRBuilder.h:2651
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2508
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1911
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2068
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:605
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2341
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2277
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1883
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2177
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1532
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1599
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2349
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2439
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1651
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1790
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition: IRBuilder.h:1573
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:552
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2209
Type * getBFloatTy()
Fetch the type representing a 16-bit brain floating point value.
Definition: IRBuilder.h:585
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitCallBase(CallBase &I)
Definition: InstVisitor.h:262
RetTy visitAtomicRMWInst(AtomicRMWInst &I)
Definition: InstVisitor.h:172
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:513
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:78
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1718
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
LLVM_ABI SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
Definition: Instructions.h:180
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition: MDBuilder.cpp:96
Metadata node.
Definition: Metadata.h:1077
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1445
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1565
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1451
LLVMContext & getContext() const
Definition: Metadata.h:1241
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:899
A single uniqued string.
Definition: Metadata.h:720
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:607
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1522
Metadata wrapper in the Value hierarchy.
Definition: Metadata.h:182
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:103
Root of the metadata hierarchy.
Definition: Metadata.h:63
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition: Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition: Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition: Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition: Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition: Module.h:149
A tuple of MDNodes.
Definition: Metadata.h:1753
LLVM_ABI void setOperand(unsigned I, MDNode *New)
Definition: Metadata.cpp:1473
LLVM_ABI MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1465
LLVM_ABI unsigned getNumOperands() const
Definition: Metadata.cpp:1461
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
Definition: Metadata.cpp:1480
iterator_range< op_iterator > operands()
Definition: Metadata.h:1849
LLVM_ABI void addOperand(MDNode *M)
Definition: Metadata.cpp:1471
A container for an operand bundle being viewed as a set of values rather than a set of uses.
Definition: InstrTypes.h:1069
ArrayRef< InputTy > inputs() const
Definition: InstrTypes.h:1084
StringRef getTag() const
Definition: InstrTypes.h:1092
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition: Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition: Type.cpp:825
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:79
void reserve(size_type N)
Definition: SmallVector.h:664
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:862
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:710
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:269
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:151
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:619
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:154
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition: StringRef.h:824
static constexpr size_t npos
Definition: StringRef.h:57
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:68
R Default(T Value)
Definition: StringSwitch.h:177
StringSwitch & StartsWith(StringLiteral S, T Value)
Definition: StringSwitch.h:80
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:87
Class to represent struct types.
Definition: DerivedTypes.h:218
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:414
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:368
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:369
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:145
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:270
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:225
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:352
Value * getOperand(unsigned i) const
Definition: User.h:232
unsigned getNumOperands() const
Definition: User.h:254
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:390
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
iterator_range< user_iterator > users()
Definition: Value.h:426
bool use_empty() const
Definition: Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098
bool hasName() const
Definition: Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:396
Base class of all SIMD vector types.
Definition: DerivedTypes.h:430
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:203
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:692
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_ABI AttributeMask typeIncompatible(Type *Ty, AttributeSet AS, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
Definition: CallingConv.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:751
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
Definition: Intrinsics.cpp:458
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
@ ADDRESS_SPACE_SHARED_CLUSTER
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
@ Length
Definition: DWP.cpp:477
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1702
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
Definition: DebugInfo.cpp:565
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1777
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition: Metadata.h:53
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
Definition: Verifier.cpp:7513
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117