34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsARM.h"
36#include "llvm/IR/IntrinsicsNVPTX.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
38#include "llvm/IR/IntrinsicsWebAssembly.h"
39#include "llvm/IR/IntrinsicsX86.h"
61 cl::desc(
"Disable autoupgrade of debug info"));
71 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
86 Type *LastArgType =
F->getFunctionType()->getParamType(
87 F->getFunctionType()->getNumParams() - 1);
102 if (
F->getReturnType()->isVectorTy())
115 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
116 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
130 if (
F->getReturnType()->getScalarType()->isBFloatTy())
140 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
154 if (Name.consume_front(
"avx."))
155 return (Name.starts_with(
"blend.p") ||
156 Name ==
"cvt.ps2.pd.256" ||
157 Name ==
"cvtdq2.pd.256" ||
158 Name ==
"cvtdq2.ps.256" ||
159 Name.starts_with(
"movnt.") ||
160 Name.starts_with(
"sqrt.p") ||
161 Name.starts_with(
"storeu.") ||
162 Name.starts_with(
"vbroadcast.s") ||
163 Name.starts_with(
"vbroadcastf128") ||
164 Name.starts_with(
"vextractf128.") ||
165 Name.starts_with(
"vinsertf128.") ||
166 Name.starts_with(
"vperm2f128.") ||
167 Name.starts_with(
"vpermil."));
169 if (Name.consume_front(
"avx2."))
170 return (Name ==
"movntdqa" ||
171 Name.starts_with(
"pabs.") ||
172 Name.starts_with(
"padds.") ||
173 Name.starts_with(
"paddus.") ||
174 Name.starts_with(
"pblendd.") ||
176 Name.starts_with(
"pbroadcast") ||
177 Name.starts_with(
"pcmpeq.") ||
178 Name.starts_with(
"pcmpgt.") ||
179 Name.starts_with(
"pmax") ||
180 Name.starts_with(
"pmin") ||
181 Name.starts_with(
"pmovsx") ||
182 Name.starts_with(
"pmovzx") ||
184 Name ==
"pmulu.dq" ||
185 Name.starts_with(
"psll.dq") ||
186 Name.starts_with(
"psrl.dq") ||
187 Name.starts_with(
"psubs.") ||
188 Name.starts_with(
"psubus.") ||
189 Name.starts_with(
"vbroadcast") ||
190 Name ==
"vbroadcasti128" ||
191 Name ==
"vextracti128" ||
192 Name ==
"vinserti128" ||
193 Name ==
"vperm2i128");
195 if (Name.consume_front(
"avx512.")) {
196 if (Name.consume_front(
"mask."))
198 return (Name.starts_with(
"add.p") ||
199 Name.starts_with(
"and.") ||
200 Name.starts_with(
"andn.") ||
201 Name.starts_with(
"broadcast.s") ||
202 Name.starts_with(
"broadcastf32x4.") ||
203 Name.starts_with(
"broadcastf32x8.") ||
204 Name.starts_with(
"broadcastf64x2.") ||
205 Name.starts_with(
"broadcastf64x4.") ||
206 Name.starts_with(
"broadcasti32x4.") ||
207 Name.starts_with(
"broadcasti32x8.") ||
208 Name.starts_with(
"broadcasti64x2.") ||
209 Name.starts_with(
"broadcasti64x4.") ||
210 Name.starts_with(
"cmp.b") ||
211 Name.starts_with(
"cmp.d") ||
212 Name.starts_with(
"cmp.q") ||
213 Name.starts_with(
"cmp.w") ||
214 Name.starts_with(
"compress.b") ||
215 Name.starts_with(
"compress.d") ||
216 Name.starts_with(
"compress.p") ||
217 Name.starts_with(
"compress.q") ||
218 Name.starts_with(
"compress.store.") ||
219 Name.starts_with(
"compress.w") ||
220 Name.starts_with(
"conflict.") ||
221 Name.starts_with(
"cvtdq2pd.") ||
222 Name.starts_with(
"cvtdq2ps.") ||
223 Name ==
"cvtpd2dq.256" ||
224 Name ==
"cvtpd2ps.256" ||
225 Name ==
"cvtps2pd.128" ||
226 Name ==
"cvtps2pd.256" ||
227 Name.starts_with(
"cvtqq2pd.") ||
228 Name ==
"cvtqq2ps.256" ||
229 Name ==
"cvtqq2ps.512" ||
230 Name ==
"cvttpd2dq.256" ||
231 Name ==
"cvttps2dq.128" ||
232 Name ==
"cvttps2dq.256" ||
233 Name.starts_with(
"cvtudq2pd.") ||
234 Name.starts_with(
"cvtudq2ps.") ||
235 Name.starts_with(
"cvtuqq2pd.") ||
236 Name ==
"cvtuqq2ps.256" ||
237 Name ==
"cvtuqq2ps.512" ||
238 Name.starts_with(
"dbpsadbw.") ||
239 Name.starts_with(
"div.p") ||
240 Name.starts_with(
"expand.b") ||
241 Name.starts_with(
"expand.d") ||
242 Name.starts_with(
"expand.load.") ||
243 Name.starts_with(
"expand.p") ||
244 Name.starts_with(
"expand.q") ||
245 Name.starts_with(
"expand.w") ||
246 Name.starts_with(
"fpclass.p") ||
247 Name.starts_with(
"insert") ||
248 Name.starts_with(
"load.") ||
249 Name.starts_with(
"loadu.") ||
250 Name.starts_with(
"lzcnt.") ||
251 Name.starts_with(
"max.p") ||
252 Name.starts_with(
"min.p") ||
253 Name.starts_with(
"movddup") ||
254 Name.starts_with(
"move.s") ||
255 Name.starts_with(
"movshdup") ||
256 Name.starts_with(
"movsldup") ||
257 Name.starts_with(
"mul.p") ||
258 Name.starts_with(
"or.") ||
259 Name.starts_with(
"pabs.") ||
260 Name.starts_with(
"packssdw.") ||
261 Name.starts_with(
"packsswb.") ||
262 Name.starts_with(
"packusdw.") ||
263 Name.starts_with(
"packuswb.") ||
264 Name.starts_with(
"padd.") ||
265 Name.starts_with(
"padds.") ||
266 Name.starts_with(
"paddus.") ||
267 Name.starts_with(
"palignr.") ||
268 Name.starts_with(
"pand.") ||
269 Name.starts_with(
"pandn.") ||
270 Name.starts_with(
"pavg") ||
271 Name.starts_with(
"pbroadcast") ||
272 Name.starts_with(
"pcmpeq.") ||
273 Name.starts_with(
"pcmpgt.") ||
274 Name.starts_with(
"perm.df.") ||
275 Name.starts_with(
"perm.di.") ||
276 Name.starts_with(
"permvar.") ||
277 Name.starts_with(
"pmaddubs.w.") ||
278 Name.starts_with(
"pmaddw.d.") ||
279 Name.starts_with(
"pmax") ||
280 Name.starts_with(
"pmin") ||
281 Name ==
"pmov.qd.256" ||
282 Name ==
"pmov.qd.512" ||
283 Name ==
"pmov.wb.256" ||
284 Name ==
"pmov.wb.512" ||
285 Name.starts_with(
"pmovsx") ||
286 Name.starts_with(
"pmovzx") ||
287 Name.starts_with(
"pmul.dq.") ||
288 Name.starts_with(
"pmul.hr.sw.") ||
289 Name.starts_with(
"pmulh.w.") ||
290 Name.starts_with(
"pmulhu.w.") ||
291 Name.starts_with(
"pmull.") ||
292 Name.starts_with(
"pmultishift.qb.") ||
293 Name.starts_with(
"pmulu.dq.") ||
294 Name.starts_with(
"por.") ||
295 Name.starts_with(
"prol.") ||
296 Name.starts_with(
"prolv.") ||
297 Name.starts_with(
"pror.") ||
298 Name.starts_with(
"prorv.") ||
299 Name.starts_with(
"pshuf.b.") ||
300 Name.starts_with(
"pshuf.d.") ||
301 Name.starts_with(
"pshufh.w.") ||
302 Name.starts_with(
"pshufl.w.") ||
303 Name.starts_with(
"psll.d") ||
304 Name.starts_with(
"psll.q") ||
305 Name.starts_with(
"psll.w") ||
306 Name.starts_with(
"pslli") ||
307 Name.starts_with(
"psllv") ||
308 Name.starts_with(
"psra.d") ||
309 Name.starts_with(
"psra.q") ||
310 Name.starts_with(
"psra.w") ||
311 Name.starts_with(
"psrai") ||
312 Name.starts_with(
"psrav") ||
313 Name.starts_with(
"psrl.d") ||
314 Name.starts_with(
"psrl.q") ||
315 Name.starts_with(
"psrl.w") ||
316 Name.starts_with(
"psrli") ||
317 Name.starts_with(
"psrlv") ||
318 Name.starts_with(
"psub.") ||
319 Name.starts_with(
"psubs.") ||
320 Name.starts_with(
"psubus.") ||
321 Name.starts_with(
"pternlog.") ||
322 Name.starts_with(
"punpckh") ||
323 Name.starts_with(
"punpckl") ||
324 Name.starts_with(
"pxor.") ||
325 Name.starts_with(
"shuf.f") ||
326 Name.starts_with(
"shuf.i") ||
327 Name.starts_with(
"shuf.p") ||
328 Name.starts_with(
"sqrt.p") ||
329 Name.starts_with(
"store.b.") ||
330 Name.starts_with(
"store.d.") ||
331 Name.starts_with(
"store.p") ||
332 Name.starts_with(
"store.q.") ||
333 Name.starts_with(
"store.w.") ||
334 Name ==
"store.ss" ||
335 Name.starts_with(
"storeu.") ||
336 Name.starts_with(
"sub.p") ||
337 Name.starts_with(
"ucmp.") ||
338 Name.starts_with(
"unpckh.") ||
339 Name.starts_with(
"unpckl.") ||
340 Name.starts_with(
"valign.") ||
341 Name ==
"vcvtph2ps.128" ||
342 Name ==
"vcvtph2ps.256" ||
343 Name.starts_with(
"vextract") ||
344 Name.starts_with(
"vfmadd.") ||
345 Name.starts_with(
"vfmaddsub.") ||
346 Name.starts_with(
"vfnmadd.") ||
347 Name.starts_with(
"vfnmsub.") ||
348 Name.starts_with(
"vpdpbusd.") ||
349 Name.starts_with(
"vpdpbusds.") ||
350 Name.starts_with(
"vpdpwssd.") ||
351 Name.starts_with(
"vpdpwssds.") ||
352 Name.starts_with(
"vpermi2var.") ||
353 Name.starts_with(
"vpermil.p") ||
354 Name.starts_with(
"vpermilvar.") ||
355 Name.starts_with(
"vpermt2var.") ||
356 Name.starts_with(
"vpmadd52") ||
357 Name.starts_with(
"vpshld.") ||
358 Name.starts_with(
"vpshldv.") ||
359 Name.starts_with(
"vpshrd.") ||
360 Name.starts_with(
"vpshrdv.") ||
361 Name.starts_with(
"vpshufbitqmb.") ||
362 Name.starts_with(
"xor."));
364 if (Name.consume_front(
"mask3."))
366 return (Name.starts_with(
"vfmadd.") ||
367 Name.starts_with(
"vfmaddsub.") ||
368 Name.starts_with(
"vfmsub.") ||
369 Name.starts_with(
"vfmsubadd.") ||
370 Name.starts_with(
"vfnmsub."));
372 if (Name.consume_front(
"maskz."))
374 return (Name.starts_with(
"pternlog.") ||
375 Name.starts_with(
"vfmadd.") ||
376 Name.starts_with(
"vfmaddsub.") ||
377 Name.starts_with(
"vpdpbusd.") ||
378 Name.starts_with(
"vpdpbusds.") ||
379 Name.starts_with(
"vpdpwssd.") ||
380 Name.starts_with(
"vpdpwssds.") ||
381 Name.starts_with(
"vpermt2var.") ||
382 Name.starts_with(
"vpmadd52") ||
383 Name.starts_with(
"vpshldv.") ||
384 Name.starts_with(
"vpshrdv."));
387 return (Name ==
"movntdqa" ||
388 Name ==
"pmul.dq.512" ||
389 Name ==
"pmulu.dq.512" ||
390 Name.starts_with(
"broadcastm") ||
391 Name.starts_with(
"cmp.p") ||
392 Name.starts_with(
"cvtb2mask.") ||
393 Name.starts_with(
"cvtd2mask.") ||
394 Name.starts_with(
"cvtmask2") ||
395 Name.starts_with(
"cvtq2mask.") ||
396 Name ==
"cvtusi2sd" ||
397 Name.starts_with(
"cvtw2mask.") ||
402 Name ==
"kortestc.w" ||
403 Name ==
"kortestz.w" ||
404 Name.starts_with(
"kunpck") ||
407 Name.starts_with(
"padds.") ||
408 Name.starts_with(
"pbroadcast") ||
409 Name.starts_with(
"prol") ||
410 Name.starts_with(
"pror") ||
411 Name.starts_with(
"psll.dq") ||
412 Name.starts_with(
"psrl.dq") ||
413 Name.starts_with(
"psubs.") ||
414 Name.starts_with(
"ptestm") ||
415 Name.starts_with(
"ptestnm") ||
416 Name.starts_with(
"storent.") ||
417 Name.starts_with(
"vbroadcast.s") ||
418 Name.starts_with(
"vpshld.") ||
419 Name.starts_with(
"vpshrd."));
422 if (Name.consume_front(
"fma."))
423 return (Name.starts_with(
"vfmadd.") ||
424 Name.starts_with(
"vfmsub.") ||
425 Name.starts_with(
"vfmsubadd.") ||
426 Name.starts_with(
"vfnmadd.") ||
427 Name.starts_with(
"vfnmsub."));
429 if (Name.consume_front(
"fma4."))
430 return Name.starts_with(
"vfmadd.s");
432 if (Name.consume_front(
"sse."))
433 return (Name ==
"add.ss" ||
434 Name ==
"cvtsi2ss" ||
435 Name ==
"cvtsi642ss" ||
438 Name.starts_with(
"sqrt.p") ||
440 Name.starts_with(
"storeu.") ||
443 if (Name.consume_front(
"sse2."))
444 return (Name ==
"add.sd" ||
445 Name ==
"cvtdq2pd" ||
446 Name ==
"cvtdq2ps" ||
447 Name ==
"cvtps2pd" ||
448 Name ==
"cvtsi2sd" ||
449 Name ==
"cvtsi642sd" ||
450 Name ==
"cvtss2sd" ||
453 Name.starts_with(
"padds.") ||
454 Name.starts_with(
"paddus.") ||
455 Name.starts_with(
"pcmpeq.") ||
456 Name.starts_with(
"pcmpgt.") ||
461 Name ==
"pmulu.dq" ||
462 Name.starts_with(
"pshuf") ||
463 Name.starts_with(
"psll.dq") ||
464 Name.starts_with(
"psrl.dq") ||
465 Name.starts_with(
"psubs.") ||
466 Name.starts_with(
"psubus.") ||
467 Name.starts_with(
"sqrt.p") ||
469 Name ==
"storel.dq" ||
470 Name.starts_with(
"storeu.") ||
473 if (Name.consume_front(
"sse41."))
474 return (Name.starts_with(
"blendp") ||
475 Name ==
"movntdqa" ||
485 Name.starts_with(
"pmovsx") ||
486 Name.starts_with(
"pmovzx") ||
489 if (Name.consume_front(
"sse42."))
490 return Name ==
"crc32.64.8";
492 if (Name.consume_front(
"sse4a."))
493 return Name.starts_with(
"movnt.");
495 if (Name.consume_front(
"ssse3."))
496 return (Name ==
"pabs.b.128" ||
497 Name ==
"pabs.d.128" ||
498 Name ==
"pabs.w.128");
500 if (Name.consume_front(
"xop."))
501 return (Name ==
"vpcmov" ||
502 Name ==
"vpcmov.256" ||
503 Name.starts_with(
"vpcom") ||
504 Name.starts_with(
"vprot"));
506 return (Name ==
"addcarry.u32" ||
507 Name ==
"addcarry.u64" ||
508 Name ==
"addcarryx.u32" ||
509 Name ==
"addcarryx.u64" ||
510 Name ==
"subborrow.u32" ||
511 Name ==
"subborrow.u64" ||
512 Name.starts_with(
"vcvtph2ps."));
518 if (!Name.consume_front(
"x86."))
526 if (Name ==
"rdtscp") {
528 if (
F->getFunctionType()->getNumParams() == 0)
533 Intrinsic::x86_rdtscp);
540 if (Name.consume_front(
"sse41.ptest")) {
542 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
543 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
544 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
557 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
558 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
559 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
560 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
561 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
562 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
567 if (Name.consume_front(
"avx512.")) {
568 if (Name.consume_front(
"mask.cmp.")) {
571 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
572 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
573 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
574 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
575 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
576 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
580 }
else if (Name.starts_with(
"vpdpbusd.") ||
581 Name.starts_with(
"vpdpbusds.")) {
584 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
585 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
586 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
587 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
588 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
589 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
597 if (Name.consume_front(
"avx512bf16.")) {
600 .
Case(
"cvtne2ps2bf16.128",
601 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
602 .
Case(
"cvtne2ps2bf16.256",
603 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
604 .
Case(
"cvtne2ps2bf16.512",
605 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
606 .
Case(
"mask.cvtneps2bf16.128",
607 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
608 .
Case(
"cvtneps2bf16.256",
609 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
610 .
Case(
"cvtneps2bf16.512",
611 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
618 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
619 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
620 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
627 if (Name.consume_front(
"xop.")) {
629 if (Name.starts_with(
"vpermil2")) {
632 auto Idx =
F->getFunctionType()->getParamType(2);
633 if (Idx->isFPOrFPVectorTy()) {
634 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
635 unsigned EltSize = Idx->getScalarSizeInBits();
636 if (EltSize == 64 && IdxSize == 128)
637 ID = Intrinsic::x86_xop_vpermil2pd;
638 else if (EltSize == 32 && IdxSize == 128)
639 ID = Intrinsic::x86_xop_vpermil2ps;
640 else if (EltSize == 64 && IdxSize == 256)
641 ID = Intrinsic::x86_xop_vpermil2pd_256;
643 ID = Intrinsic::x86_xop_vpermil2ps_256;
645 }
else if (
F->arg_size() == 2)
648 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
649 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
660 if (Name ==
"seh.recoverfp") {
662 Intrinsic::eh_recoverfp);
674 if (Name.starts_with(
"rbit")) {
677 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
681 if (Name ==
"thread.pointer") {
684 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
688 bool Neon = Name.consume_front(
"neon.");
693 if (Name.consume_front(
"bfdot.")) {
697 .
Cases(
"v2f32.v8i8",
"v4f32.v16i8",
702 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
703 assert((OperandWidth == 64 || OperandWidth == 128) &&
704 "Unexpected operand width");
706 std::array<Type *, 2> Tys{
717 if (Name.consume_front(
"bfm")) {
719 if (Name.consume_back(
".v4f32.v16i8")) {
765 F->arg_begin()->getType());
769 if (Name.consume_front(
"vst")) {
771 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
775 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
776 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
779 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
780 Intrinsic::arm_neon_vst4lane};
782 auto fArgs =
F->getFunctionType()->params();
783 Type *Tys[] = {fArgs[0], fArgs[1]};
786 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
789 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
798 if (Name.consume_front(
"mve.")) {
800 if (Name ==
"vctp64") {
810 if (Name.starts_with(
"vrintn.v")) {
812 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
817 if (Name.consume_back(
".v4i1")) {
819 if (Name.consume_back(
".predicated.v2i64.v4i32"))
821 return Name ==
"mull.int" || Name ==
"vqdmull";
823 if (Name.consume_back(
".v2i64")) {
825 bool IsGather = Name.consume_front(
"vldr.gather.");
826 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
827 if (Name.consume_front(
"base.")) {
829 Name.consume_front(
"wb.");
832 return Name ==
"predicated.v2i64";
835 if (Name.consume_front(
"offset.predicated."))
836 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
837 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
850 if (Name.consume_front(
"cde.vcx")) {
852 if (Name.consume_back(
".predicated.v2i64.v4i1"))
854 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
855 Name ==
"3q" || Name ==
"3qa";
869 F->arg_begin()->getType());
873 if (Name.starts_with(
"addp")) {
875 if (
F->arg_size() != 2)
878 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
880 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
886 if (Name.starts_with(
"bfcvt")) {
893 if (Name.consume_front(
"sve.")) {
895 if (Name.consume_front(
"bf")) {
896 if (Name.consume_back(
".lane")) {
900 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
901 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
902 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
914 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
919 if (Name.consume_front(
"addqv")) {
921 if (!
F->getReturnType()->isFPOrFPVectorTy())
924 auto Args =
F->getFunctionType()->params();
925 Type *Tys[] = {
F->getReturnType(), Args[1]};
927 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
931 if (Name.consume_front(
"ld")) {
933 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
934 if (LdRegex.
match(Name)) {
941 Intrinsic::aarch64_sve_ld2_sret,
942 Intrinsic::aarch64_sve_ld3_sret,
943 Intrinsic::aarch64_sve_ld4_sret,
946 LoadIDs[Name[0] -
'2'], Ty);
952 if (Name.consume_front(
"tuple.")) {
954 if (Name.starts_with(
"get")) {
956 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
958 F->getParent(), Intrinsic::vector_extract, Tys);
962 if (Name.starts_with(
"set")) {
964 auto Args =
F->getFunctionType()->params();
965 Type *Tys[] = {Args[0], Args[2], Args[1]};
967 F->getParent(), Intrinsic::vector_insert, Tys);
971 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
972 if (CreateTupleRegex.
match(Name)) {
974 auto Args =
F->getFunctionType()->params();
975 Type *Tys[] = {
F->getReturnType(), Args[1]};
977 F->getParent(), Intrinsic::vector_insert, Tys);
990 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
994 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
996 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
998 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
999 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1000 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1001 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1002 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1003 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1012 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1026 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1027 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1037 if (Name.consume_front(
"mapa.shared.cluster"))
1038 if (
F->getReturnType()->getPointerAddressSpace() ==
1040 return Intrinsic::nvvm_mapa_shared_cluster;
1042 if (Name.consume_front(
"cp.async.bulk.")) {
1045 .
Case(
"global.to.shared.cluster",
1046 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1047 .
Case(
"shared.cta.to.cluster",
1048 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1052 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1061 if (Name.consume_front(
"fma.rn."))
1063 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1064 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1065 .
Case(
"ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1066 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1067 .
Case(
"ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1068 .
Case(
"ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1069 .
Case(
"ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1070 .
Case(
"ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1071 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1072 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1073 .
Case(
"sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1074 .
Case(
"sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1077 if (Name.consume_front(
"fmax."))
1079 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1080 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1081 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1082 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1083 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1084 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1085 .
Case(
"ftz.nan.xorsign.abs.bf16",
1086 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1087 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1088 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1089 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1090 .
Case(
"ftz.xorsign.abs.bf16x2",
1091 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1092 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1093 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1094 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1095 .
Case(
"nan.xorsign.abs.bf16x2",
1096 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1097 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1098 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1101 if (Name.consume_front(
"fmin."))
1103 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1104 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1105 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1106 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1107 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1108 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1109 .
Case(
"ftz.nan.xorsign.abs.bf16",
1110 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1111 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1112 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1113 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1114 .
Case(
"ftz.xorsign.abs.bf16x2",
1115 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1116 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1117 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1118 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1119 .
Case(
"nan.xorsign.abs.bf16x2",
1120 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1121 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1122 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1125 if (Name.consume_front(
"neg."))
1127 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1128 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1135 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1136 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1137 Name.consume_front(
"param");
1141 bool CanUpgradeDebugIntrinsicsToRecords) {
1142 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1147 if (!Name.consume_front(
"llvm.") || Name.empty())
1153 bool IsArm = Name.consume_front(
"arm.");
1154 if (IsArm || Name.consume_front(
"aarch64.")) {
1160 if (Name.consume_front(
"amdgcn.")) {
1161 if (Name ==
"alignbit") {
1164 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1168 if (Name.consume_front(
"atomic.")) {
1169 if (Name.starts_with(
"inc") || Name.starts_with(
"dec")) {
1178 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1179 Name.consume_front(
"flat.atomic.")) {
1180 if (Name.starts_with(
"fadd") ||
1182 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1183 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1191 if (Name.starts_with(
"ldexp.")) {
1194 F->getParent(), Intrinsic::ldexp,
1195 {F->getReturnType(), F->getArg(1)->getType()});
1204 if (
F->arg_size() == 1) {
1212 F->arg_begin()->getType());
1217 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1220 Intrinsic::coro_end);
1227 if (Name.consume_front(
"dbg.")) {
1229 if (CanUpgradeDebugIntrinsicsToRecords) {
1230 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1231 Name ==
"declare" || Name ==
"label") {
1240 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1243 Intrinsic::dbg_value);
1250 if (Name.consume_front(
"experimental.vector.")) {
1256 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1257 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1258 .
StartsWith(
"splice.", Intrinsic::vector_splice)
1259 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1260 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1261 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1264 const auto *FT =
F->getFunctionType();
1266 if (
ID == Intrinsic::vector_extract ||
1267 ID == Intrinsic::vector_interleave2)
1270 if (
ID != Intrinsic::vector_interleave2)
1272 if (
ID == Intrinsic::vector_insert)
1280 if (Name.consume_front(
"reduce.")) {
1282 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1283 if (R.match(Name, &
Groups))
1285 .
Case(
"add", Intrinsic::vector_reduce_add)
1286 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1287 .
Case(
"and", Intrinsic::vector_reduce_and)
1288 .
Case(
"or", Intrinsic::vector_reduce_or)
1289 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1290 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1291 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1292 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1293 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1294 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1295 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1300 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1305 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1306 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1311 auto Args =
F->getFunctionType()->params();
1313 {Args[V2 ? 1 : 0]});
1320 if (Name.consume_front(
"experimental.stepvector.")) {
1324 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1329 if (Name.starts_with(
"flt.rounds")) {
1332 Intrinsic::get_rounding);
1337 if (Name.starts_with(
"invariant.group.barrier")) {
1339 auto Args =
F->getFunctionType()->params();
1340 Type* ObjectPtr[1] = {Args[0]};
1343 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1348 if ((Name.starts_with(
"lifetime.start") ||
1349 Name.starts_with(
"lifetime.end")) &&
1350 F->arg_size() == 2) {
1352 ? Intrinsic::lifetime_start
1353 : Intrinsic::lifetime_end;
1356 F->getArg(0)->getType());
1365 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1366 .StartsWith(
"memmove.", Intrinsic::memmove)
1368 if (
F->arg_size() == 5) {
1372 F->getFunctionType()->params().slice(0, 3);
1378 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1381 const auto *FT =
F->getFunctionType();
1382 Type *ParamTypes[2] = {
1383 FT->getParamType(0),
1387 Intrinsic::memset, ParamTypes);
1393 if (Name.consume_front(
"nvvm.")) {
1395 if (
F->arg_size() == 1) {
1398 .
Cases(
"brev32",
"brev64", Intrinsic::bitreverse)
1399 .
Case(
"clz.i", Intrinsic::ctlz)
1400 .
Case(
"popc.i", Intrinsic::ctpop)
1404 {F->getReturnType()});
1410 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1438 bool Expand =
false;
1439 if (Name.consume_front(
"abs."))
1442 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1443 else if (Name.consume_front(
"fabs."))
1445 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1446 else if (Name.consume_front(
"max.") || Name.consume_front(
"min."))
1448 Expand = Name ==
"s" || Name ==
"i" || Name ==
"ll" || Name ==
"us" ||
1449 Name ==
"ui" || Name ==
"ull";
1450 else if (Name.consume_front(
"atomic.load."))
1459 else if (Name.consume_front(
"bitcast."))
1462 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1463 else if (Name.consume_front(
"rotate."))
1465 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1466 else if (Name.consume_front(
"ptr.gen.to."))
1469 else if (Name.consume_front(
"ptr."))
1472 else if (Name.consume_front(
"ldg.global."))
1474 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1475 Name.starts_with(
"p."));
1478 .
Case(
"barrier0",
true)
1479 .
Case(
"barrier.n",
true)
1480 .
Case(
"barrier.sync.cnt",
true)
1481 .
Case(
"barrier.sync",
true)
1482 .
Case(
"barrier",
true)
1483 .
Case(
"bar.sync",
true)
1484 .
Case(
"clz.ll",
true)
1485 .
Case(
"popc.ll",
true)
1487 .
Case(
"swap.lo.hi.b64",
true)
1488 .
Case(
"tanh.approx.f32",
true)
1500 if (Name.starts_with(
"objectsize.")) {
1501 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1502 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1505 Intrinsic::objectsize, Tys);
1512 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1515 F->getParent(), Intrinsic::ptr_annotation,
1516 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1522 if (Name.consume_front(
"riscv.")) {
1525 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1526 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1527 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1528 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1531 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1544 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1545 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1554 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1555 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1556 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1557 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1562 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1574 if (Name ==
"stackprotectorcheck") {
1581 if (Name ==
"thread.pointer") {
1583 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1589 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1592 F->getParent(), Intrinsic::var_annotation,
1593 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1600 if (Name.consume_front(
"wasm.")) {
1603 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1604 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1605 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1610 F->getReturnType());
1614 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1616 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1618 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1637 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1646 auto *FT =
F->getFunctionType();
1649 std::string
Name =
F->getName().str();
1652 Name,
F->getParent());
1663 if (Result != std::nullopt) {
1676 bool CanUpgradeDebugIntrinsicsToRecords) {
1696 GV->
getName() ==
"llvm.global_dtors")) ||
1711 unsigned N =
Init->getNumOperands();
1712 std::vector<Constant *> NewCtors(
N);
1713 for (
unsigned i = 0; i !=
N; ++i) {
1716 Ctor->getAggregateElement(1),
1730 unsigned NumElts = ResultTy->getNumElements() * 8;
1734 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1744 for (
unsigned l = 0; l != NumElts; l += 16)
1745 for (
unsigned i = 0; i != 16; ++i) {
1746 unsigned Idx = NumElts + i - Shift;
1748 Idx -= NumElts - 16;
1749 Idxs[l + i] = Idx + l;
1752 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1756 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1764 unsigned NumElts = ResultTy->getNumElements() * 8;
1768 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1778 for (
unsigned l = 0; l != NumElts; l += 16)
1779 for (
unsigned i = 0; i != 16; ++i) {
1780 unsigned Idx = i + Shift;
1782 Idx += NumElts - 16;
1783 Idxs[l + i] = Idx + l;
1786 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
1790 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1798 Mask = Builder.CreateBitCast(Mask, MaskTy);
1804 for (
unsigned i = 0; i != NumElts; ++i)
1806 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
1817 if (
C->isAllOnesValue())
1822 return Builder.CreateSelect(Mask, Op0, Op1);
1829 if (
C->isAllOnesValue())
1833 Mask->getType()->getIntegerBitWidth());
1834 Mask = Builder.CreateBitCast(Mask, MaskTy);
1835 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
1836 return Builder.CreateSelect(Mask, Op0, Op1);
1849 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
1850 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
1855 ShiftVal &= (NumElts - 1);
1864 if (ShiftVal > 16) {
1872 for (
unsigned l = 0; l < NumElts; l += 16) {
1873 for (
unsigned i = 0; i != 16; ++i) {
1874 unsigned Idx = ShiftVal + i;
1875 if (!IsVALIGN && Idx >= 16)
1876 Idx += NumElts - 16;
1877 Indices[l + i] = Idx + l;
1882 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
1888 bool ZeroMask,
bool IndexForm) {
1891 unsigned EltWidth = Ty->getScalarSizeInBits();
1892 bool IsFloat = Ty->isFPOrFPVectorTy();
1894 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1895 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1896 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1897 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1898 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1899 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1900 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1901 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1902 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1903 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1904 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1905 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1906 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1907 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1908 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1909 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1910 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1911 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1912 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1913 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1914 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1915 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1916 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1917 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1918 else if (VecWidth == 128 && EltWidth == 16)
1919 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1920 else if (VecWidth == 256 && EltWidth == 16)
1921 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1922 else if (VecWidth == 512 && EltWidth == 16)
1923 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1924 else if (VecWidth == 128 && EltWidth == 8)
1925 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1926 else if (VecWidth == 256 && EltWidth == 8)
1927 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1928 else if (VecWidth == 512 && EltWidth == 8)
1929 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1940 Value *V = Builder.CreateIntrinsic(IID, Args);
1952 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
1963 bool IsRotateRight) {
1973 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
1974 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1977 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1978 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2023 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2028 bool IsShiftRight,
bool ZeroMask) {
2042 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2043 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2046 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2047 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2062 const Align Alignment =
2064 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2069 if (
C->isAllOnesValue())
2070 return Builder.CreateAlignedStore(
Data,
Ptr, Alignment);
2075 return Builder.CreateMaskedStore(
Data,
Ptr, Alignment, Mask);
2081 const Align Alignment =
2090 if (
C->isAllOnesValue())
2091 return Builder.CreateAlignedLoad(ValTy,
Ptr, Alignment);
2096 return Builder.CreateMaskedLoad(ValTy,
Ptr, Alignment, Mask, Passthru);
2102 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2103 {Op0, Builder.getInt1(
false)});
2118 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2119 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2120 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2121 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2122 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2125 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2126 LHS = Builder.CreateAnd(
LHS, Mask);
2127 RHS = Builder.CreateAnd(
RHS, Mask);
2144 if (!
C || !
C->isAllOnesValue())
2145 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2150 for (
unsigned i = 0; i != NumElts; ++i)
2152 for (
unsigned i = NumElts; i != 8; ++i)
2153 Indices[i] = NumElts + i % NumElts;
2154 Vec = Builder.CreateShuffleVector(Vec,
2158 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2162 unsigned CC,
bool Signed) {
2170 }
else if (CC == 7) {
2206 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2207 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2209 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2210 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2219 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2225 Name = Name.substr(12);
2230 if (Name.starts_with(
"max.p")) {
2231 if (VecWidth == 128 && EltWidth == 32)
2232 IID = Intrinsic::x86_sse_max_ps;
2233 else if (VecWidth == 128 && EltWidth == 64)
2234 IID = Intrinsic::x86_sse2_max_pd;
2235 else if (VecWidth == 256 && EltWidth == 32)
2236 IID = Intrinsic::x86_avx_max_ps_256;
2237 else if (VecWidth == 256 && EltWidth == 64)
2238 IID = Intrinsic::x86_avx_max_pd_256;
2241 }
else if (Name.starts_with(
"min.p")) {
2242 if (VecWidth == 128 && EltWidth == 32)
2243 IID = Intrinsic::x86_sse_min_ps;
2244 else if (VecWidth == 128 && EltWidth == 64)
2245 IID = Intrinsic::x86_sse2_min_pd;
2246 else if (VecWidth == 256 && EltWidth == 32)
2247 IID = Intrinsic::x86_avx_min_ps_256;
2248 else if (VecWidth == 256 && EltWidth == 64)
2249 IID = Intrinsic::x86_avx_min_pd_256;
2252 }
else if (Name.starts_with(
"pshuf.b.")) {
2253 if (VecWidth == 128)
2254 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2255 else if (VecWidth == 256)
2256 IID = Intrinsic::x86_avx2_pshuf_b;
2257 else if (VecWidth == 512)
2258 IID = Intrinsic::x86_avx512_pshuf_b_512;
2261 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2262 if (VecWidth == 128)
2263 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2264 else if (VecWidth == 256)
2265 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2266 else if (VecWidth == 512)
2267 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2270 }
else if (Name.starts_with(
"pmulh.w.")) {
2271 if (VecWidth == 128)
2272 IID = Intrinsic::x86_sse2_pmulh_w;
2273 else if (VecWidth == 256)
2274 IID = Intrinsic::x86_avx2_pmulh_w;
2275 else if (VecWidth == 512)
2276 IID = Intrinsic::x86_avx512_pmulh_w_512;
2279 }
else if (Name.starts_with(
"pmulhu.w.")) {
2280 if (VecWidth == 128)
2281 IID = Intrinsic::x86_sse2_pmulhu_w;
2282 else if (VecWidth == 256)
2283 IID = Intrinsic::x86_avx2_pmulhu_w;
2284 else if (VecWidth == 512)
2285 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2288 }
else if (Name.starts_with(
"pmaddw.d.")) {
2289 if (VecWidth == 128)
2290 IID = Intrinsic::x86_sse2_pmadd_wd;
2291 else if (VecWidth == 256)
2292 IID = Intrinsic::x86_avx2_pmadd_wd;
2293 else if (VecWidth == 512)
2294 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2297 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2298 if (VecWidth == 128)
2299 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2300 else if (VecWidth == 256)
2301 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2302 else if (VecWidth == 512)
2303 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2306 }
else if (Name.starts_with(
"packsswb.")) {
2307 if (VecWidth == 128)
2308 IID = Intrinsic::x86_sse2_packsswb_128;
2309 else if (VecWidth == 256)
2310 IID = Intrinsic::x86_avx2_packsswb;
2311 else if (VecWidth == 512)
2312 IID = Intrinsic::x86_avx512_packsswb_512;
2315 }
else if (Name.starts_with(
"packssdw.")) {
2316 if (VecWidth == 128)
2317 IID = Intrinsic::x86_sse2_packssdw_128;
2318 else if (VecWidth == 256)
2319 IID = Intrinsic::x86_avx2_packssdw;
2320 else if (VecWidth == 512)
2321 IID = Intrinsic::x86_avx512_packssdw_512;
2324 }
else if (Name.starts_with(
"packuswb.")) {
2325 if (VecWidth == 128)
2326 IID = Intrinsic::x86_sse2_packuswb_128;
2327 else if (VecWidth == 256)
2328 IID = Intrinsic::x86_avx2_packuswb;
2329 else if (VecWidth == 512)
2330 IID = Intrinsic::x86_avx512_packuswb_512;
2333 }
else if (Name.starts_with(
"packusdw.")) {
2334 if (VecWidth == 128)
2335 IID = Intrinsic::x86_sse41_packusdw;
2336 else if (VecWidth == 256)
2337 IID = Intrinsic::x86_avx2_packusdw;
2338 else if (VecWidth == 512)
2339 IID = Intrinsic::x86_avx512_packusdw_512;
2342 }
else if (Name.starts_with(
"vpermilvar.")) {
2343 if (VecWidth == 128 && EltWidth == 32)
2344 IID = Intrinsic::x86_avx_vpermilvar_ps;
2345 else if (VecWidth == 128 && EltWidth == 64)
2346 IID = Intrinsic::x86_avx_vpermilvar_pd;
2347 else if (VecWidth == 256 && EltWidth == 32)
2348 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2349 else if (VecWidth == 256 && EltWidth == 64)
2350 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2351 else if (VecWidth == 512 && EltWidth == 32)
2352 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2353 else if (VecWidth == 512 && EltWidth == 64)
2354 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2357 }
else if (Name ==
"cvtpd2dq.256") {
2358 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2359 }
else if (Name ==
"cvtpd2ps.256") {
2360 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2361 }
else if (Name ==
"cvttpd2dq.256") {
2362 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2363 }
else if (Name ==
"cvttps2dq.128") {
2364 IID = Intrinsic::x86_sse2_cvttps2dq;
2365 }
else if (Name ==
"cvttps2dq.256") {
2366 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2367 }
else if (Name.starts_with(
"permvar.")) {
2369 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2370 IID = Intrinsic::x86_avx2_permps;
2371 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2372 IID = Intrinsic::x86_avx2_permd;
2373 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2374 IID = Intrinsic::x86_avx512_permvar_df_256;
2375 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2376 IID = Intrinsic::x86_avx512_permvar_di_256;
2377 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2378 IID = Intrinsic::x86_avx512_permvar_sf_512;
2379 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2380 IID = Intrinsic::x86_avx512_permvar_si_512;
2381 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2382 IID = Intrinsic::x86_avx512_permvar_df_512;
2383 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2384 IID = Intrinsic::x86_avx512_permvar_di_512;
2385 else if (VecWidth == 128 && EltWidth == 16)
2386 IID = Intrinsic::x86_avx512_permvar_hi_128;
2387 else if (VecWidth == 256 && EltWidth == 16)
2388 IID = Intrinsic::x86_avx512_permvar_hi_256;
2389 else if (VecWidth == 512 && EltWidth == 16)
2390 IID = Intrinsic::x86_avx512_permvar_hi_512;
2391 else if (VecWidth == 128 && EltWidth == 8)
2392 IID = Intrinsic::x86_avx512_permvar_qi_128;
2393 else if (VecWidth == 256 && EltWidth == 8)
2394 IID = Intrinsic::x86_avx512_permvar_qi_256;
2395 else if (VecWidth == 512 && EltWidth == 8)
2396 IID = Intrinsic::x86_avx512_permvar_qi_512;
2399 }
else if (Name.starts_with(
"dbpsadbw.")) {
2400 if (VecWidth == 128)
2401 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2402 else if (VecWidth == 256)
2403 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2404 else if (VecWidth == 512)
2405 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2408 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2409 if (VecWidth == 128)
2410 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2411 else if (VecWidth == 256)
2412 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2413 else if (VecWidth == 512)
2414 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2417 }
else if (Name.starts_with(
"conflict.")) {
2418 if (Name[9] ==
'd' && VecWidth == 128)
2419 IID = Intrinsic::x86_avx512_conflict_d_128;
2420 else if (Name[9] ==
'd' && VecWidth == 256)
2421 IID = Intrinsic::x86_avx512_conflict_d_256;
2422 else if (Name[9] ==
'd' && VecWidth == 512)
2423 IID = Intrinsic::x86_avx512_conflict_d_512;
2424 else if (Name[9] ==
'q' && VecWidth == 128)
2425 IID = Intrinsic::x86_avx512_conflict_q_128;
2426 else if (Name[9] ==
'q' && VecWidth == 256)
2427 IID = Intrinsic::x86_avx512_conflict_q_256;
2428 else if (Name[9] ==
'q' && VecWidth == 512)
2429 IID = Intrinsic::x86_avx512_conflict_q_512;
2432 }
else if (Name.starts_with(
"pavg.")) {
2433 if (Name[5] ==
'b' && VecWidth == 128)
2434 IID = Intrinsic::x86_sse2_pavg_b;
2435 else if (Name[5] ==
'b' && VecWidth == 256)
2436 IID = Intrinsic::x86_avx2_pavg_b;
2437 else if (Name[5] ==
'b' && VecWidth == 512)
2438 IID = Intrinsic::x86_avx512_pavg_b_512;
2439 else if (Name[5] ==
'w' && VecWidth == 128)
2440 IID = Intrinsic::x86_sse2_pavg_w;
2441 else if (Name[5] ==
'w' && VecWidth == 256)
2442 IID = Intrinsic::x86_avx2_pavg_w;
2443 else if (Name[5] ==
'w' && VecWidth == 512)
2444 IID = Intrinsic::x86_avx512_pavg_w_512;
2453 Rep = Builder.CreateIntrinsic(IID, Args);
2464 if (AsmStr->find(
"mov\tfp") == 0 &&
2465 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2466 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2467 AsmStr->replace(Pos, 1,
";");
2473 Value *Rep =
nullptr;
2475 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2477 Value *Neg = Builder.CreateNeg(Arg,
"neg");
2478 Value *Cmp = Builder.CreateICmpSGE(
2480 Rep = Builder.CreateSelect(Cmp, Arg, Neg,
"abs");
2481 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2482 Type *Ty = (Name ==
"abs.bf16")
2486 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2487 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2488 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2489 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2490 : Intrinsic::nvvm_fabs;
2491 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2492 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2493 Name.starts_with(
"atomic.load.add.f64.p")) {
2498 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2499 Name.starts_with(
"atomic.load.dec.32.p")) {
2506 }
else if (Name.consume_front(
"max.") &&
2507 (Name ==
"s" || Name ==
"i" || Name ==
"ll" || Name ==
"us" ||
2508 Name ==
"ui" || Name ==
"ull")) {
2511 Value *Cmp = Name.starts_with(
"u")
2512 ? Builder.CreateICmpUGE(Arg0, Arg1,
"max.cond")
2513 : Builder.CreateICmpSGE(Arg0, Arg1,
"max.cond");
2514 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1,
"max");
2515 }
else if (Name.consume_front(
"min.") &&
2516 (Name ==
"s" || Name ==
"i" || Name ==
"ll" || Name ==
"us" ||
2517 Name ==
"ui" || Name ==
"ull")) {
2520 Value *Cmp = Name.starts_with(
"u")
2521 ? Builder.CreateICmpULE(Arg0, Arg1,
"min.cond")
2522 : Builder.CreateICmpSLE(Arg0, Arg1,
"min.cond");
2523 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1,
"min");
2524 }
else if (Name ==
"clz.ll") {
2527 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2528 {Arg, Builder.getFalse()},
2530 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2531 }
else if (Name ==
"popc.ll") {
2535 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2536 Arg,
nullptr,
"ctpop");
2537 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2538 }
else if (Name ==
"h2f") {
2539 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2542 }
else if (Name.consume_front(
"bitcast.") &&
2543 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2546 }
else if (Name ==
"rotate.b32") {
2549 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2550 {Arg, Arg, ShiftAmt});
2551 }
else if (Name ==
"rotate.b64") {
2555 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2556 {Arg, Arg, ZExtShiftAmt});
2557 }
else if (Name ==
"rotate.right.b64") {
2561 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2562 {Arg, Arg, ZExtShiftAmt});
2563 }
else if (Name ==
"swap.lo.hi.b64") {
2566 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2567 {Arg, Arg, Builder.getInt64(32)});
2568 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2571 Name.starts_with(
".to.gen"))) {
2573 }
else if (Name.consume_front(
"ldg.global")) {
2577 Value *ASC = Builder.CreateAddrSpaceCast(
Ptr, Builder.getPtrTy(1));
2580 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2582 }
else if (Name ==
"tanh.approx.f32") {
2586 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2588 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2590 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2591 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2593 }
else if (Name ==
"barrier") {
2594 Rep = Builder.CreateIntrinsic(
2595 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2597 }
else if (Name ==
"barrier.sync") {
2598 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2600 }
else if (Name ==
"barrier.sync.cnt") {
2601 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2606 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2616 ? Builder.CreateBitCast(Arg, NewType)
2619 Rep = Builder.CreateCall(NewFn, Args);
2620 if (
F->getReturnType()->isIntegerTy())
2621 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2631 Value *Rep =
nullptr;
2633 if (Name.starts_with(
"sse4a.movnt.")) {
2645 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2648 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2649 }
else if (Name.starts_with(
"avx.movnt.") ||
2650 Name.starts_with(
"avx512.storent.")) {
2662 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2663 }
else if (Name ==
"sse2.storel.dq") {
2668 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2669 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2670 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2671 }
else if (Name.starts_with(
"sse.storeu.") ||
2672 Name.starts_with(
"sse2.storeu.") ||
2673 Name.starts_with(
"avx.storeu.")) {
2676 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2677 }
else if (Name ==
"avx512.mask.store.ss") {
2681 }
else if (Name.starts_with(
"avx512.mask.store")) {
2683 bool Aligned = Name[17] !=
'u';
2686 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2689 bool CmpEq = Name[9] ==
'e';
2692 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2693 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2700 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2701 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2703 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2704 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2705 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2706 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2707 Name.starts_with(
"sse2.sqrt.p") ||
2708 Name.starts_with(
"sse.sqrt.p")) {
2709 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2710 {CI->getArgOperand(0)});
2711 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2715 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2716 : Intrinsic::x86_avx512_sqrt_pd_512;
2719 Rep = Builder.CreateIntrinsic(IID, Args);
2721 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2722 {CI->getArgOperand(0)});
2726 }
else if (Name.starts_with(
"avx512.ptestm") ||
2727 Name.starts_with(
"avx512.ptestnm")) {
2731 Rep = Builder.CreateAnd(Op0, Op1);
2737 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2739 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2742 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2745 }
else if (Name.starts_with(
"avx512.kunpck")) {
2750 for (
unsigned i = 0; i != NumElts; ++i)
2759 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2760 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2761 }
else if (Name ==
"avx512.kand.w") {
2764 Rep = Builder.CreateAnd(
LHS,
RHS);
2765 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2766 }
else if (Name ==
"avx512.kandn.w") {
2769 LHS = Builder.CreateNot(
LHS);
2770 Rep = Builder.CreateAnd(
LHS,
RHS);
2771 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2772 }
else if (Name ==
"avx512.kor.w") {
2775 Rep = Builder.CreateOr(
LHS,
RHS);
2776 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2777 }
else if (Name ==
"avx512.kxor.w") {
2780 Rep = Builder.CreateXor(
LHS,
RHS);
2781 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2782 }
else if (Name ==
"avx512.kxnor.w") {
2785 LHS = Builder.CreateNot(
LHS);
2786 Rep = Builder.CreateXor(
LHS,
RHS);
2787 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2788 }
else if (Name ==
"avx512.knot.w") {
2790 Rep = Builder.CreateNot(Rep);
2791 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2792 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
2795 Rep = Builder.CreateOr(
LHS,
RHS);
2796 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2798 if (Name[14] ==
'c')
2802 Rep = Builder.CreateICmpEQ(Rep,
C);
2803 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2804 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
2805 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
2806 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
2807 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
2810 ConstantInt::get(I32Ty, 0));
2812 ConstantInt::get(I32Ty, 0));
2814 if (Name.contains(
".add."))
2815 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2816 else if (Name.contains(
".sub."))
2817 EltOp = Builder.CreateFSub(Elt0, Elt1);
2818 else if (Name.contains(
".mul."))
2819 EltOp = Builder.CreateFMul(Elt0, Elt1);
2821 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2822 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
2823 ConstantInt::get(I32Ty, 0));
2824 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
2826 bool CmpEq = Name[16] ==
'e';
2828 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
2836 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2839 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2842 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2849 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
2854 if (VecWidth == 128 && EltWidth == 32)
2855 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2856 else if (VecWidth == 256 && EltWidth == 32)
2857 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2858 else if (VecWidth == 512 && EltWidth == 32)
2859 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2860 else if (VecWidth == 128 && EltWidth == 64)
2861 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2862 else if (VecWidth == 256 && EltWidth == 64)
2863 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2864 else if (VecWidth == 512 && EltWidth == 64)
2865 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2872 }
else if (Name.starts_with(
"avx512.cmp.p")) {
2874 Type *OpTy = Args[0]->getType();
2878 if (VecWidth == 128 && EltWidth == 32)
2879 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2880 else if (VecWidth == 256 && EltWidth == 32)
2881 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2882 else if (VecWidth == 512 && EltWidth == 32)
2883 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2884 else if (VecWidth == 128 && EltWidth == 64)
2885 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2886 else if (VecWidth == 256 && EltWidth == 64)
2887 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2888 else if (VecWidth == 512 && EltWidth == 64)
2889 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2894 if (VecWidth == 512)
2896 Args.push_back(Mask);
2898 Rep = Builder.CreateIntrinsic(IID, Args);
2899 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
2903 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
2906 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
2907 Name.starts_with(
"avx512.cvtw2mask.") ||
2908 Name.starts_with(
"avx512.cvtd2mask.") ||
2909 Name.starts_with(
"avx512.cvtq2mask.")) {
2914 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
2915 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
2916 Name.starts_with(
"avx512.mask.pabs")) {
2918 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
2919 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
2920 Name.starts_with(
"avx512.mask.pmaxs")) {
2922 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
2923 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
2924 Name.starts_with(
"avx512.mask.pmaxu")) {
2926 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
2927 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
2928 Name.starts_with(
"avx512.mask.pmins")) {
2930 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
2931 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
2932 Name.starts_with(
"avx512.mask.pminu")) {
2934 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
2935 Name ==
"avx512.pmulu.dq.512" ||
2936 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
2938 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
2939 Name ==
"avx512.pmul.dq.512" ||
2940 Name.starts_with(
"avx512.mask.pmul.dq.")) {
2942 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
2943 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
2948 }
else if (Name ==
"avx512.cvtusi2sd") {
2953 }
else if (Name ==
"sse2.cvtss2sd") {
2955 Rep = Builder.CreateFPExt(
2958 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
2959 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
2960 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
2961 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
2962 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
2963 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
2964 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
2965 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
2966 Name ==
"avx512.mask.cvtqq2ps.256" ||
2967 Name ==
"avx512.mask.cvtqq2ps.512" ||
2968 Name ==
"avx512.mask.cvtuqq2ps.256" ||
2969 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
2970 Name ==
"avx.cvt.ps2.pd.256" ||
2971 Name ==
"avx512.mask.cvtps2pd.128" ||
2972 Name ==
"avx512.mask.cvtps2pd.256") {
2977 unsigned NumDstElts = DstTy->getNumElements();
2979 assert(NumDstElts == 2 &&
"Unexpected vector size");
2980 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
2983 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2984 bool IsUnsigned = Name.contains(
"cvtu");
2986 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
2990 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2991 : Intrinsic::x86_avx512_sitofp_round;
2992 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
2995 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
2996 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3002 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3003 Name.starts_with(
"vcvtph2ps.")) {
3007 unsigned NumDstElts = DstTy->getNumElements();
3008 if (NumDstElts != SrcTy->getNumElements()) {
3009 assert(NumDstElts == 4 &&
"Unexpected vector size");
3010 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3012 Rep = Builder.CreateBitCast(
3014 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3018 }
else if (Name.starts_with(
"avx512.mask.load")) {
3020 bool Aligned = Name[16] !=
'u';
3023 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3026 ResultTy->getNumElements());
3028 Rep = Builder.CreateIntrinsic(
3029 Intrinsic::masked_expandload, ResultTy,
3031 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3037 Rep = Builder.CreateIntrinsic(
3038 Intrinsic::masked_compressstore, ResultTy,
3040 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3041 Name.starts_with(
"avx512.mask.expand.")) {
3045 ResultTy->getNumElements());
3047 bool IsCompress = Name[12] ==
'c';
3048 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3049 : Intrinsic::x86_avx512_mask_expand;
3050 Rep = Builder.CreateIntrinsic(
3052 }
else if (Name.starts_with(
"xop.vpcom")) {
3054 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3055 Name.ends_with(
"uq"))
3057 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3058 Name.ends_with(
"d") || Name.ends_with(
"q"))
3067 Name = Name.substr(9);
3068 if (Name.starts_with(
"lt"))
3070 else if (Name.starts_with(
"le"))
3072 else if (Name.starts_with(
"gt"))
3074 else if (Name.starts_with(
"ge"))
3076 else if (Name.starts_with(
"eq"))
3078 else if (Name.starts_with(
"ne"))
3080 else if (Name.starts_with(
"false"))
3082 else if (Name.starts_with(
"true"))
3089 }
else if (Name.starts_with(
"xop.vpcmov")) {
3091 Value *NotSel = Builder.CreateNot(Sel);
3094 Rep = Builder.CreateOr(Sel0, Sel1);
3095 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3096 Name.starts_with(
"avx512.mask.prol")) {
3098 }
else if (Name.starts_with(
"avx512.pror") ||
3099 Name.starts_with(
"avx512.mask.pror")) {
3101 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3102 Name.starts_with(
"avx512.mask.vpshld") ||
3103 Name.starts_with(
"avx512.maskz.vpshld")) {
3104 bool ZeroMask = Name[11] ==
'z';
3106 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3107 Name.starts_with(
"avx512.mask.vpshrd") ||
3108 Name.starts_with(
"avx512.maskz.vpshrd")) {
3109 bool ZeroMask = Name[11] ==
'z';
3111 }
else if (Name ==
"sse42.crc32.64.8") {
3114 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3116 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3117 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3118 Name.starts_with(
"avx512.vbroadcast.s")) {
3121 Type *EltTy = VecTy->getElementType();
3122 unsigned EltNum = VecTy->getNumElements();
3126 for (
unsigned I = 0;
I < EltNum; ++
I)
3127 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3128 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3129 Name.starts_with(
"sse41.pmovzx") ||
3130 Name.starts_with(
"avx2.pmovsx") ||
3131 Name.starts_with(
"avx2.pmovzx") ||
3132 Name.starts_with(
"avx512.mask.pmovsx") ||
3133 Name.starts_with(
"avx512.mask.pmovzx")) {
3135 unsigned NumDstElts = DstTy->getNumElements();
3139 for (
unsigned i = 0; i != NumDstElts; ++i)
3144 bool DoSext = Name.contains(
"pmovsx");
3146 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3151 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3152 Name ==
"avx512.mask.pmov.qd.512" ||
3153 Name ==
"avx512.mask.pmov.wb.256" ||
3154 Name ==
"avx512.mask.pmov.wb.512") {
3159 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3160 Name ==
"avx2.vbroadcasti128") {
3166 if (NumSrcElts == 2)
3167 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3169 Rep = Builder.CreateShuffleVector(Load,
3171 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3172 Name.starts_with(
"avx512.mask.shuf.f")) {
3177 unsigned ControlBitsMask = NumLanes - 1;
3178 unsigned NumControlBits = NumLanes / 2;
3181 for (
unsigned l = 0; l != NumLanes; ++l) {
3182 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3184 if (l >= NumLanes / 2)
3185 LaneMask += NumLanes;
3186 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3187 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3193 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3194 Name.starts_with(
"avx512.mask.broadcasti")) {
3197 unsigned NumDstElts =
3201 for (
unsigned i = 0; i != NumDstElts; ++i)
3202 ShuffleMask[i] = i % NumSrcElts;
3208 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3209 Name.starts_with(
"avx2.vbroadcast") ||
3210 Name.starts_with(
"avx512.pbroadcast") ||
3211 Name.starts_with(
"avx512.mask.broadcast.s")) {
3218 Rep = Builder.CreateShuffleVector(
Op, M);
3223 }
else if (Name.starts_with(
"sse2.padds.") ||
3224 Name.starts_with(
"avx2.padds.") ||
3225 Name.starts_with(
"avx512.padds.") ||
3226 Name.starts_with(
"avx512.mask.padds.")) {
3228 }
else if (Name.starts_with(
"sse2.psubs.") ||
3229 Name.starts_with(
"avx2.psubs.") ||
3230 Name.starts_with(
"avx512.psubs.") ||
3231 Name.starts_with(
"avx512.mask.psubs.")) {
3233 }
else if (Name.starts_with(
"sse2.paddus.") ||
3234 Name.starts_with(
"avx2.paddus.") ||
3235 Name.starts_with(
"avx512.mask.paddus.")) {
3237 }
else if (Name.starts_with(
"sse2.psubus.") ||
3238 Name.starts_with(
"avx2.psubus.") ||
3239 Name.starts_with(
"avx512.mask.psubus.")) {
3241 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3246 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3250 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3255 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3260 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3261 Name ==
"avx512.psll.dq.512") {
3265 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3266 Name ==
"avx512.psrl.dq.512") {
3270 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3271 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3272 Name.starts_with(
"avx2.pblendd.")) {
3277 unsigned NumElts = VecTy->getNumElements();
3280 for (
unsigned i = 0; i != NumElts; ++i)
3281 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3283 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3284 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3285 Name ==
"avx2.vinserti128" ||
3286 Name.starts_with(
"avx512.mask.insert")) {
3290 unsigned DstNumElts =
3292 unsigned SrcNumElts =
3294 unsigned Scale = DstNumElts / SrcNumElts;
3301 for (
unsigned i = 0; i != SrcNumElts; ++i)
3303 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3304 Idxs[i] = SrcNumElts;
3305 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3319 for (
unsigned i = 0; i != DstNumElts; ++i)
3322 for (
unsigned i = 0; i != SrcNumElts; ++i)
3323 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3324 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3330 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3331 Name ==
"avx2.vextracti128" ||
3332 Name.starts_with(
"avx512.mask.vextract")) {
3335 unsigned DstNumElts =
3337 unsigned SrcNumElts =
3339 unsigned Scale = SrcNumElts / DstNumElts;
3346 for (
unsigned i = 0; i != DstNumElts; ++i) {
3347 Idxs[i] = i + (Imm * DstNumElts);
3349 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3355 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3356 Name.starts_with(
"avx512.mask.perm.di.")) {
3360 unsigned NumElts = VecTy->getNumElements();
3363 for (
unsigned i = 0; i != NumElts; ++i)
3364 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3366 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3371 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3383 unsigned HalfSize = NumElts / 2;
3395 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3396 for (
unsigned i = 0; i < HalfSize; ++i)
3397 ShuffleMask[i] = StartIndex + i;
3400 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3401 for (
unsigned i = 0; i < HalfSize; ++i)
3402 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3404 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3406 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3407 Name.starts_with(
"avx512.mask.vpermil.p") ||
3408 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3412 unsigned NumElts = VecTy->getNumElements();
3414 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3415 unsigned IdxMask = ((1 << IdxSize) - 1);
3421 for (
unsigned i = 0; i != NumElts; ++i)
3422 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3424 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3429 }
else if (Name ==
"sse2.pshufl.w" ||
3430 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3436 for (
unsigned l = 0; l != NumElts; l += 8) {
3437 for (
unsigned i = 0; i != 4; ++i)
3438 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3439 for (
unsigned i = 4; i != 8; ++i)
3440 Idxs[i + l] = i + l;
3443 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3448 }
else if (Name ==
"sse2.pshufh.w" ||
3449 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3455 for (
unsigned l = 0; l != NumElts; l += 8) {
3456 for (
unsigned i = 0; i != 4; ++i)
3457 Idxs[i + l] = i + l;
3458 for (
unsigned i = 0; i != 4; ++i)
3459 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3462 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3467 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3474 unsigned HalfLaneElts = NumLaneElts / 2;
3477 for (
unsigned i = 0; i != NumElts; ++i) {
3479 Idxs[i] = i - (i % NumLaneElts);
3481 if ((i % NumLaneElts) >= HalfLaneElts)
3485 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3488 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3492 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3493 Name.starts_with(
"avx512.mask.movshdup") ||
3494 Name.starts_with(
"avx512.mask.movsldup")) {
3500 if (Name.starts_with(
"avx512.mask.movshdup."))
3504 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3505 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3506 Idxs[i + l + 0] = i + l +
Offset;
3507 Idxs[i + l + 1] = i + l +
Offset;
3510 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3514 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3515 Name.starts_with(
"avx512.mask.unpckl.")) {
3522 for (
int l = 0; l != NumElts; l += NumLaneElts)
3523 for (
int i = 0; i != NumLaneElts; ++i)
3524 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3526 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3530 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3531 Name.starts_with(
"avx512.mask.unpckh.")) {
3538 for (
int l = 0; l != NumElts; l += NumLaneElts)
3539 for (
int i = 0; i != NumLaneElts; ++i)
3540 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3542 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3546 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3547 Name.starts_with(
"avx512.mask.pand.")) {
3550 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3552 Rep = Builder.CreateBitCast(Rep, FTy);
3555 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3556 Name.starts_with(
"avx512.mask.pandn.")) {
3559 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3560 Rep = Builder.CreateAnd(Rep,
3562 Rep = Builder.CreateBitCast(Rep, FTy);
3565 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3566 Name.starts_with(
"avx512.mask.por.")) {
3569 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3571 Rep = Builder.CreateBitCast(Rep, FTy);
3574 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3575 Name.starts_with(
"avx512.mask.pxor.")) {
3578 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3580 Rep = Builder.CreateBitCast(Rep, FTy);
3583 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3587 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3591 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3595 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3596 if (Name.ends_with(
".512")) {
3598 if (Name[17] ==
's')
3599 IID = Intrinsic::x86_avx512_add_ps_512;
3601 IID = Intrinsic::x86_avx512_add_pd_512;
3603 Rep = Builder.CreateIntrinsic(
3611 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3612 if (Name.ends_with(
".512")) {
3614 if (Name[17] ==
's')
3615 IID = Intrinsic::x86_avx512_div_ps_512;
3617 IID = Intrinsic::x86_avx512_div_pd_512;
3619 Rep = Builder.CreateIntrinsic(
3627 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3628 if (Name.ends_with(
".512")) {
3630 if (Name[17] ==
's')
3631 IID = Intrinsic::x86_avx512_mul_ps_512;
3633 IID = Intrinsic::x86_avx512_mul_pd_512;
3635 Rep = Builder.CreateIntrinsic(
3643 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3644 if (Name.ends_with(
".512")) {
3646 if (Name[17] ==
's')
3647 IID = Intrinsic::x86_avx512_sub_ps_512;
3649 IID = Intrinsic::x86_avx512_sub_pd_512;
3651 Rep = Builder.CreateIntrinsic(
3659 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3660 Name.starts_with(
"avx512.mask.min.p")) &&
3661 Name.drop_front(18) ==
".512") {
3662 bool IsDouble = Name[17] ==
'd';
3663 bool IsMin = Name[13] ==
'i';
3665 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3666 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3669 Rep = Builder.CreateIntrinsic(
3674 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3676 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3677 {CI->getArgOperand(0), Builder.getInt1(false)});
3680 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3681 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3682 bool IsVariable = Name[16] ==
'v';
3683 char Size = Name[16] ==
'.' ? Name[17]
3684 : Name[17] ==
'.' ? Name[18]
3685 : Name[18] ==
'.' ? Name[19]
3689 if (IsVariable && Name[17] !=
'.') {
3690 if (
Size ==
'd' && Name[17] ==
'2')
3691 IID = Intrinsic::x86_avx2_psllv_q;
3692 else if (
Size ==
'd' && Name[17] ==
'4')
3693 IID = Intrinsic::x86_avx2_psllv_q_256;
3694 else if (
Size ==
's' && Name[17] ==
'4')
3695 IID = Intrinsic::x86_avx2_psllv_d;
3696 else if (
Size ==
's' && Name[17] ==
'8')
3697 IID = Intrinsic::x86_avx2_psllv_d_256;
3698 else if (
Size ==
'h' && Name[17] ==
'8')
3699 IID = Intrinsic::x86_avx512_psllv_w_128;
3700 else if (
Size ==
'h' && Name[17] ==
'1')
3701 IID = Intrinsic::x86_avx512_psllv_w_256;
3702 else if (Name[17] ==
'3' && Name[18] ==
'2')
3703 IID = Intrinsic::x86_avx512_psllv_w_512;
3706 }
else if (Name.ends_with(
".128")) {
3708 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3709 : Intrinsic::x86_sse2_psll_d;
3710 else if (
Size ==
'q')
3711 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3712 : Intrinsic::x86_sse2_psll_q;
3713 else if (
Size ==
'w')
3714 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3715 : Intrinsic::x86_sse2_psll_w;
3718 }
else if (Name.ends_with(
".256")) {
3720 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3721 : Intrinsic::x86_avx2_psll_d;
3722 else if (
Size ==
'q')
3723 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3724 : Intrinsic::x86_avx2_psll_q;
3725 else if (
Size ==
'w')
3726 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3727 : Intrinsic::x86_avx2_psll_w;
3732 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3733 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3734 : Intrinsic::x86_avx512_psll_d_512;
3735 else if (
Size ==
'q')
3736 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3737 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3738 : Intrinsic::x86_avx512_psll_q_512;
3739 else if (
Size ==
'w')
3740 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3741 : Intrinsic::x86_avx512_psll_w_512;
3747 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3748 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3749 bool IsVariable = Name[16] ==
'v';
3750 char Size = Name[16] ==
'.' ? Name[17]
3751 : Name[17] ==
'.' ? Name[18]
3752 : Name[18] ==
'.' ? Name[19]
3756 if (IsVariable && Name[17] !=
'.') {
3757 if (
Size ==
'd' && Name[17] ==
'2')
3758 IID = Intrinsic::x86_avx2_psrlv_q;
3759 else if (
Size ==
'd' && Name[17] ==
'4')
3760 IID = Intrinsic::x86_avx2_psrlv_q_256;
3761 else if (
Size ==
's' && Name[17] ==
'4')
3762 IID = Intrinsic::x86_avx2_psrlv_d;
3763 else if (
Size ==
's' && Name[17] ==
'8')
3764 IID = Intrinsic::x86_avx2_psrlv_d_256;
3765 else if (
Size ==
'h' && Name[17] ==
'8')
3766 IID = Intrinsic::x86_avx512_psrlv_w_128;
3767 else if (
Size ==
'h' && Name[17] ==
'1')
3768 IID = Intrinsic::x86_avx512_psrlv_w_256;
3769 else if (Name[17] ==
'3' && Name[18] ==
'2')
3770 IID = Intrinsic::x86_avx512_psrlv_w_512;
3773 }
else if (Name.ends_with(
".128")) {
3775 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3776 : Intrinsic::x86_sse2_psrl_d;
3777 else if (
Size ==
'q')
3778 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3779 : Intrinsic::x86_sse2_psrl_q;
3780 else if (
Size ==
'w')
3781 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3782 : Intrinsic::x86_sse2_psrl_w;
3785 }
else if (Name.ends_with(
".256")) {
3787 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3788 : Intrinsic::x86_avx2_psrl_d;
3789 else if (
Size ==
'q')
3790 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3791 : Intrinsic::x86_avx2_psrl_q;
3792 else if (
Size ==
'w')
3793 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3794 : Intrinsic::x86_avx2_psrl_w;
3799 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3800 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3801 : Intrinsic::x86_avx512_psrl_d_512;
3802 else if (
Size ==
'q')
3803 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3804 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3805 : Intrinsic::x86_avx512_psrl_q_512;
3806 else if (
Size ==
'w')
3807 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3808 : Intrinsic::x86_avx512_psrl_w_512;
3814 }
else if (Name.starts_with(
"avx512.mask.psra")) {
3815 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3816 bool IsVariable = Name[16] ==
'v';
3817 char Size = Name[16] ==
'.' ? Name[17]
3818 : Name[17] ==
'.' ? Name[18]
3819 : Name[18] ==
'.' ? Name[19]
3823 if (IsVariable && Name[17] !=
'.') {
3824 if (
Size ==
's' && Name[17] ==
'4')
3825 IID = Intrinsic::x86_avx2_psrav_d;
3826 else if (
Size ==
's' && Name[17] ==
'8')
3827 IID = Intrinsic::x86_avx2_psrav_d_256;
3828 else if (
Size ==
'h' && Name[17] ==
'8')
3829 IID = Intrinsic::x86_avx512_psrav_w_128;
3830 else if (
Size ==
'h' && Name[17] ==
'1')
3831 IID = Intrinsic::x86_avx512_psrav_w_256;
3832 else if (Name[17] ==
'3' && Name[18] ==
'2')
3833 IID = Intrinsic::x86_avx512_psrav_w_512;
3836 }
else if (Name.ends_with(
".128")) {
3838 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3839 : Intrinsic::x86_sse2_psra_d;
3840 else if (
Size ==
'q')
3841 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3842 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3843 : Intrinsic::x86_avx512_psra_q_128;
3844 else if (
Size ==
'w')
3845 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3846 : Intrinsic::x86_sse2_psra_w;
3849 }
else if (Name.ends_with(
".256")) {
3851 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3852 : Intrinsic::x86_avx2_psra_d;
3853 else if (
Size ==
'q')
3854 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3855 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3856 : Intrinsic::x86_avx512_psra_q_256;
3857 else if (
Size ==
'w')
3858 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3859 : Intrinsic::x86_avx2_psra_w;
3864 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3865 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3866 : Intrinsic::x86_avx512_psra_d_512;
3867 else if (
Size ==
'q')
3868 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3869 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3870 : Intrinsic::x86_avx512_psra_q_512;
3871 else if (
Size ==
'w')
3872 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3873 : Intrinsic::x86_avx512_psra_w_512;
3879 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
3881 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
3883 }
else if (Name.ends_with(
".movntdqa")) {
3887 LoadInst *LI = Builder.CreateAlignedLoad(
3892 }
else if (Name.starts_with(
"fma.vfmadd.") ||
3893 Name.starts_with(
"fma.vfmsub.") ||
3894 Name.starts_with(
"fma.vfnmadd.") ||
3895 Name.starts_with(
"fma.vfnmsub.")) {
3896 bool NegMul = Name[6] ==
'n';
3897 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
3898 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
3909 if (NegMul && !IsScalar)
3910 Ops[0] = Builder.CreateFNeg(
Ops[0]);
3911 if (NegMul && IsScalar)
3912 Ops[1] = Builder.CreateFNeg(
Ops[1]);
3914 Ops[2] = Builder.CreateFNeg(
Ops[2]);
3916 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
3920 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
3928 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
3932 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
3933 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
3934 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
3935 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
3936 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
3937 bool IsMask3 = Name[11] ==
'3';
3938 bool IsMaskZ = Name[11] ==
'z';
3940 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3941 bool NegMul = Name[2] ==
'n';
3942 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
3948 if (NegMul && (IsMask3 || IsMaskZ))
3949 A = Builder.CreateFNeg(
A);
3950 if (NegMul && !(IsMask3 || IsMaskZ))
3951 B = Builder.CreateFNeg(
B);
3953 C = Builder.CreateFNeg(
C);
3955 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
3956 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
3957 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
3964 if (Name.back() ==
'd')
3965 IID = Intrinsic::x86_avx512_vfmadd_f64;
3967 IID = Intrinsic::x86_avx512_vfmadd_f32;
3968 Rep = Builder.CreateIntrinsic(IID,
Ops);
3970 Rep = Builder.CreateFMA(
A,
B,
C);
3979 if (NegAcc && IsMask3)
3984 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
3986 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
3987 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
3988 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
3989 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
3990 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
3991 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
3992 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
3993 bool IsMask3 = Name[11] ==
'3';
3994 bool IsMaskZ = Name[11] ==
'z';
3996 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3997 bool NegMul = Name[2] ==
'n';
3998 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4004 if (NegMul && (IsMask3 || IsMaskZ))
4005 A = Builder.CreateFNeg(
A);
4006 if (NegMul && !(IsMask3 || IsMaskZ))
4007 B = Builder.CreateFNeg(
B);
4009 C = Builder.CreateFNeg(
C);
4016 if (Name[Name.size() - 5] ==
's')
4017 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4019 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4023 Rep = Builder.CreateFMA(
A,
B,
C);
4031 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4035 if (VecWidth == 128 && EltWidth == 32)
4036 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4037 else if (VecWidth == 256 && EltWidth == 32)
4038 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4039 else if (VecWidth == 128 && EltWidth == 64)
4040 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4041 else if (VecWidth == 256 && EltWidth == 64)
4042 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4048 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4049 Rep = Builder.CreateIntrinsic(IID,
Ops);
4050 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4051 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4052 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4053 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4054 bool IsMask3 = Name[11] ==
'3';
4055 bool IsMaskZ = Name[11] ==
'z';
4057 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4058 bool IsSubAdd = Name[3] ==
's';
4062 if (Name[Name.size() - 5] ==
's')
4063 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4065 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4070 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4072 Rep = Builder.CreateIntrinsic(IID,
Ops);
4081 Value *Odd = Builder.CreateCall(FMA,
Ops);
4082 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4083 Value *Even = Builder.CreateCall(FMA,
Ops);
4089 for (
int i = 0; i != NumElts; ++i)
4090 Idxs[i] = i + (i % 2) * NumElts;
4092 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4100 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4101 Name.starts_with(
"avx512.maskz.pternlog.")) {
4102 bool ZeroMask = Name[11] ==
'z';
4106 if (VecWidth == 128 && EltWidth == 32)
4107 IID = Intrinsic::x86_avx512_pternlog_d_128;
4108 else if (VecWidth == 256 && EltWidth == 32)
4109 IID = Intrinsic::x86_avx512_pternlog_d_256;
4110 else if (VecWidth == 512 && EltWidth == 32)
4111 IID = Intrinsic::x86_avx512_pternlog_d_512;
4112 else if (VecWidth == 128 && EltWidth == 64)
4113 IID = Intrinsic::x86_avx512_pternlog_q_128;
4114 else if (VecWidth == 256 && EltWidth == 64)
4115 IID = Intrinsic::x86_avx512_pternlog_q_256;
4116 else if (VecWidth == 512 && EltWidth == 64)
4117 IID = Intrinsic::x86_avx512_pternlog_q_512;
4123 Rep = Builder.CreateIntrinsic(IID, Args);
4127 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4128 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4129 bool ZeroMask = Name[11] ==
'z';
4130 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4133 if (VecWidth == 128 && !
High)
4134 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4135 else if (VecWidth == 256 && !
High)
4136 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4137 else if (VecWidth == 512 && !
High)
4138 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4139 else if (VecWidth == 128 &&
High)
4140 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4141 else if (VecWidth == 256 &&
High)
4142 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4143 else if (VecWidth == 512 &&
High)
4144 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4150 Rep = Builder.CreateIntrinsic(IID, Args);
4154 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4155 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4156 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4157 bool ZeroMask = Name[11] ==
'z';
4158 bool IndexForm = Name[17] ==
'i';
4160 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4161 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4162 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4163 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4164 bool ZeroMask = Name[11] ==
'z';
4165 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4168 if (VecWidth == 128 && !IsSaturating)
4169 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4170 else if (VecWidth == 256 && !IsSaturating)
4171 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4172 else if (VecWidth == 512 && !IsSaturating)
4173 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4174 else if (VecWidth == 128 && IsSaturating)
4175 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4176 else if (VecWidth == 256 && IsSaturating)
4177 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4178 else if (VecWidth == 512 && IsSaturating)
4179 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4189 if (Args[1]->
getType()->isVectorTy() &&
4192 ->isIntegerTy(32) &&
4193 Args[2]->
getType()->isVectorTy() &&
4196 ->isIntegerTy(32)) {
4197 Type *NewArgType =
nullptr;
4198 if (VecWidth == 128)
4200 else if (VecWidth == 256)
4202 else if (VecWidth == 512)
4207 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4208 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4211 Rep = Builder.CreateIntrinsic(IID, Args);
4215 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4216 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4217 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4218 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4219 bool ZeroMask = Name[11] ==
'z';
4220 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4223 if (VecWidth == 128 && !IsSaturating)
4224 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4225 else if (VecWidth == 256 && !IsSaturating)
4226 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4227 else if (VecWidth == 512 && !IsSaturating)
4228 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4229 else if (VecWidth == 128 && IsSaturating)
4230 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4231 else if (VecWidth == 256 && IsSaturating)
4232 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4233 else if (VecWidth == 512 && IsSaturating)
4234 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4240 Rep = Builder.CreateIntrinsic(IID, Args);
4244 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4245 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4246 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4248 if (Name[0] ==
'a' && Name.back() ==
'2')
4249 IID = Intrinsic::x86_addcarry_32;
4250 else if (Name[0] ==
'a' && Name.back() ==
'4')
4251 IID = Intrinsic::x86_addcarry_64;
4252 else if (Name[0] ==
's' && Name.back() ==
'2')
4253 IID = Intrinsic::x86_subborrow_32;
4254 else if (Name[0] ==
's' && Name.back() ==
'4')
4255 IID = Intrinsic::x86_subborrow_64;
4262 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4265 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4268 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4272 }
else if (Name.starts_with(
"avx512.mask.") &&
4282 if (Name.starts_with(
"neon.bfcvt")) {
4283 if (Name.starts_with(
"neon.bfcvtn2")) {
4285 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4287 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4288 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4291 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4292 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4294 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4298 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4299 return Builder.CreateShuffleVector(
4302 return Builder.CreateFPTrunc(CI->
getOperand(0),
4305 }
else if (Name.starts_with(
"sve.fcvt")) {
4308 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4309 .
Case(
"sve.fcvtnt.bf16f32",
4310 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4322 if (Args[1]->
getType() != BadPredTy)
4325 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4326 BadPredTy, Args[1]);
4327 Args[1] = Builder.CreateIntrinsic(
4328 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4330 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4339 if (Name ==
"mve.vctp64.old") {
4342 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4345 Value *C1 = Builder.CreateIntrinsic(
4346 Intrinsic::arm_mve_pred_v2i,
4348 return Builder.CreateIntrinsic(
4349 Intrinsic::arm_mve_pred_i2v,
4351 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4352 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4353 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4354 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4356 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4357 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4358 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4359 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4361 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4362 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4363 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4364 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4365 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4366 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4367 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4368 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4369 std::vector<Type *> Tys;
4373 case Intrinsic::arm_mve_mull_int_predicated:
4374 case Intrinsic::arm_mve_vqdmull_predicated:
4375 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4378 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4379 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4380 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4384 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4388 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4392 case Intrinsic::arm_cde_vcx1q_predicated:
4393 case Intrinsic::arm_cde_vcx1qa_predicated:
4394 case Intrinsic::arm_cde_vcx2q_predicated:
4395 case Intrinsic::arm_cde_vcx2qa_predicated:
4396 case Intrinsic::arm_cde_vcx3q_predicated:
4397 case Intrinsic::arm_cde_vcx3qa_predicated:
4404 std::vector<Value *>
Ops;
4406 Type *Ty =
Op->getType();
4407 if (Ty->getScalarSizeInBits() == 1) {
4408 Value *C1 = Builder.CreateIntrinsic(
4409 Intrinsic::arm_mve_pred_v2i,
4411 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4416 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4444 if (NumOperands < 3)
4457 bool IsVolatile =
false;
4461 if (NumOperands > 3)
4466 if (NumOperands > 5) {
4468 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4482 if (VT->getElementType()->isIntegerTy(16)) {
4485 Val = Builder.CreateBitCast(Val, AsBF16);
4493 Builder.CreateAtomicRMW(RMWOp,
Ptr, Val, std::nullopt, Order, SSID);
4495 unsigned AddrSpace = PtrTy->getAddressSpace();
4498 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4500 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4505 MDNode *RangeNotPrivate =
4508 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4514 return Builder.CreateBitCast(RMW, RetTy);
4535 return MAV->getMetadata();
4542 return I->getDebugLoc().getAsMDNode();
4550 if (Name ==
"label") {
4553 }
else if (Name ==
"assign") {
4560 }
else if (Name ==
"declare") {
4565 }
else if (Name ==
"addr") {
4575 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4577 }
else if (Name ==
"value") {
4580 unsigned ExprOp = 2;
4594 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4616 assert(Name.starts_with(
"llvm.") &&
"Intrinsic doesn't start with 'llvm.'");
4617 Name = Name.substr(5);
4619 bool IsX86 = Name.consume_front(
"x86.");
4620 bool IsNVVM = Name.consume_front(
"nvvm.");
4621 bool IsAArch64 = Name.consume_front(
"aarch64.");
4622 bool IsARM = Name.consume_front(
"arm.");
4623 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
4624 bool IsDbg = Name.consume_front(
"dbg.");
4625 Value *Rep =
nullptr;
4627 if (!IsX86 && Name ==
"stackprotectorcheck") {
4629 }
else if (IsNVVM) {
4633 }
else if (IsAArch64) {
4637 }
else if (IsAMDGCN) {
4651 const auto &DefaultCase = [&]() ->
void {
4659 "Unknown function for CallBase upgrade and isn't just a name change");
4667 "Return type must have changed");
4668 assert(OldST->getNumElements() ==
4670 "Must have same number of elements");
4673 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4676 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4677 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4678 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4697 case Intrinsic::arm_neon_vst1:
4698 case Intrinsic::arm_neon_vst2:
4699 case Intrinsic::arm_neon_vst3:
4700 case Intrinsic::arm_neon_vst4:
4701 case Intrinsic::arm_neon_vst2lane:
4702 case Intrinsic::arm_neon_vst3lane:
4703 case Intrinsic::arm_neon_vst4lane: {
4705 NewCall = Builder.CreateCall(NewFn, Args);
4708 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4709 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4710 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4715 NewCall = Builder.CreateCall(NewFn, Args);
4718 case Intrinsic::aarch64_sve_ld3_sret:
4719 case Intrinsic::aarch64_sve_ld4_sret:
4720 case Intrinsic::aarch64_sve_ld2_sret: {
4722 Name = Name.substr(5);
4729 unsigned MinElts = RetTy->getMinNumElements() /
N;
4731 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4733 for (
unsigned I = 0;
I <
N;
I++) {
4734 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
4735 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
4741 case Intrinsic::coro_end: {
4744 NewCall = Builder.CreateCall(NewFn, Args);
4748 case Intrinsic::vector_extract: {
4750 Name = Name.substr(5);
4751 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
4756 unsigned MinElts = RetTy->getMinNumElements();
4759 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
4763 case Intrinsic::vector_insert: {
4765 Name = Name.substr(5);
4766 if (!Name.starts_with(
"aarch64.sve.tuple")) {
4770 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
4775 NewCall = Builder.CreateCall(
4779 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
4785 assert(
N > 1 &&
"Create is expected to be between 2-4");
4788 unsigned MinElts = RetTy->getMinNumElements() /
N;
4789 for (
unsigned I = 0;
I <
N;
I++) {
4791 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
4798 case Intrinsic::arm_neon_bfdot:
4799 case Intrinsic::arm_neon_bfmmla:
4800 case Intrinsic::arm_neon_bfmlalb:
4801 case Intrinsic::arm_neon_bfmlalt:
4802 case Intrinsic::aarch64_neon_bfdot:
4803 case Intrinsic::aarch64_neon_bfmmla:
4804 case Intrinsic::aarch64_neon_bfmlalb:
4805 case Intrinsic::aarch64_neon_bfmlalt: {
4808 "Mismatch between function args and call args");
4809 size_t OperandWidth =
4811 assert((OperandWidth == 64 || OperandWidth == 128) &&
4812 "Unexpected operand width");
4814 auto Iter = CI->
args().begin();
4815 Args.push_back(*Iter++);
4816 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4817 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4818 NewCall = Builder.CreateCall(NewFn, Args);
4822 case Intrinsic::bitreverse:
4823 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
4826 case Intrinsic::ctlz:
4827 case Intrinsic::cttz:
4829 "Mismatch between function args and call args");
4831 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
4834 case Intrinsic::objectsize: {
4835 Value *NullIsUnknownSize =
4839 NewCall = Builder.CreateCall(
4844 case Intrinsic::ctpop:
4845 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
4848 case Intrinsic::convert_from_fp16:
4849 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
4852 case Intrinsic::dbg_value: {
4854 Name = Name.substr(5);
4856 if (Name.starts_with(
"dbg.addr")) {
4870 if (
Offset->isZeroValue()) {
4871 NewCall = Builder.CreateCall(
4880 case Intrinsic::ptr_annotation:
4888 NewCall = Builder.CreateCall(
4897 case Intrinsic::var_annotation:
4904 NewCall = Builder.CreateCall(
4913 case Intrinsic::riscv_aes32dsi:
4914 case Intrinsic::riscv_aes32dsmi:
4915 case Intrinsic::riscv_aes32esi:
4916 case Intrinsic::riscv_aes32esmi:
4917 case Intrinsic::riscv_sm4ks:
4918 case Intrinsic::riscv_sm4ed: {
4928 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4929 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4935 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4936 Value *Res = NewCall;
4938 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
4944 case Intrinsic::nvvm_mapa_shared_cluster: {
4948 Value *Res = NewCall;
4949 Res = Builder.CreateAddrSpaceCast(
4956 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
4957 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
4960 Args[0] = Builder.CreateAddrSpaceCast(
4963 NewCall = Builder.CreateCall(NewFn, Args);
4969 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
4970 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
4971 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
4972 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
4973 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
4974 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
4975 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
4976 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
4983 Args[0] = Builder.CreateAddrSpaceCast(
4992 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
4994 NewCall = Builder.CreateCall(NewFn, Args);
5000 case Intrinsic::riscv_sha256sig0:
5001 case Intrinsic::riscv_sha256sig1:
5002 case Intrinsic::riscv_sha256sum0:
5003 case Intrinsic::riscv_sha256sum1:
5004 case Intrinsic::riscv_sm3p0:
5005 case Intrinsic::riscv_sm3p1: {
5012 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5014 NewCall = Builder.CreateCall(NewFn, Arg);
5016 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5023 case Intrinsic::x86_xop_vfrcz_ss:
5024 case Intrinsic::x86_xop_vfrcz_sd:
5025 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5028 case Intrinsic::x86_xop_vpermil2pd:
5029 case Intrinsic::x86_xop_vpermil2ps:
5030 case Intrinsic::x86_xop_vpermil2pd_256:
5031 case Intrinsic::x86_xop_vpermil2ps_256: {
5035 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5036 NewCall = Builder.CreateCall(NewFn, Args);
5040 case Intrinsic::x86_sse41_ptestc:
5041 case Intrinsic::x86_sse41_ptestz:
5042 case Intrinsic::x86_sse41_ptestnzc: {
5056 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5057 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5059 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5063 case Intrinsic::x86_rdtscp: {
5069 NewCall = Builder.CreateCall(NewFn);
5071 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5074 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5082 case Intrinsic::x86_sse41_insertps:
5083 case Intrinsic::x86_sse41_dppd:
5084 case Intrinsic::x86_sse41_dpps:
5085 case Intrinsic::x86_sse41_mpsadbw:
5086 case Intrinsic::x86_avx_dp_ps_256:
5087 case Intrinsic::x86_avx2_mpsadbw: {
5093 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5094 NewCall = Builder.CreateCall(NewFn, Args);
5098 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5099 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5100 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5101 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5102 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5103 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5109 NewCall = Builder.CreateCall(NewFn, Args);
5118 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5119 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5120 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5121 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5122 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5123 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5127 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5128 Args[1] = Builder.CreateBitCast(
5131 NewCall = Builder.CreateCall(NewFn, Args);
5132 Value *Res = Builder.CreateBitCast(
5140 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5141 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5142 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5146 Args[1] = Builder.CreateBitCast(
5148 Args[2] = Builder.CreateBitCast(
5151 NewCall = Builder.CreateCall(NewFn, Args);
5155 case Intrinsic::thread_pointer: {
5156 NewCall = Builder.CreateCall(NewFn, {});
5160 case Intrinsic::memcpy:
5161 case Intrinsic::memmove:
5162 case Intrinsic::memset: {
5178 NewCall = Builder.CreateCall(NewFn, Args);
5180 AttributeList NewAttrs = AttributeList::get(
5181 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5182 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5183 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5188 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5191 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5195 case Intrinsic::lifetime_start:
5196 case Intrinsic::lifetime_end: {
5204 Ptr =
Ptr->stripPointerCasts();
5208 NewCall = Builder.CreateLifetimeStart(
Ptr);
5210 NewCall = Builder.CreateLifetimeEnd(
Ptr);
5219 case Intrinsic::x86_avx512_vpdpbusd_128:
5220 case Intrinsic::x86_avx512_vpdpbusd_256:
5221 case Intrinsic::x86_avx512_vpdpbusd_512:
5222 case Intrinsic::x86_avx512_vpdpbusds_128:
5223 case Intrinsic::x86_avx512_vpdpbusds_256:
5224 case Intrinsic::x86_avx512_vpdpbusds_512: {
5229 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5230 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5232 NewCall = Builder.CreateCall(NewFn, Args);
5236 assert(NewCall &&
"Should have either set this variable or returned through "
5237 "the default case");
5244 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5258 F->eraseFromParent();
5264 if (NumOperands == 0)
5272 if (NumOperands == 3) {
5276 Metadata *Elts2[] = {ScalarType, ScalarType,
5290 if (
Opc != Instruction::BitCast)
5294 Type *SrcTy = V->getType();
5311 if (
Opc != Instruction::BitCast)
5314 Type *SrcTy =
C->getType();
5341 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5342 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5343 if (Flag->getNumOperands() < 3)
5345 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5346 return K->getString() ==
"Debug Info Version";
5349 if (OpIt != ModFlags->op_end()) {
5350 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5357 bool BrokenDebugInfo =
false;
5360 if (!BrokenDebugInfo)
5366 M.getContext().diagnose(Diag);
5373 M.getContext().diagnose(DiagVersion);
5383 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5386 if (
F->hasFnAttribute(Attr)) {
5389 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5391 auto [Part, Rest] = S.
split(
',');
5397 const unsigned Dim = DimC -
'x';
5398 assert(Dim < 3 &&
"Unexpected dim char");
5408 F->addFnAttr(Attr, NewAttr);
5412 return S ==
"x" || S ==
"y" || S ==
"z";
5417 if (K ==
"kernel") {
5429 const unsigned Idx = (AlignIdxValuePair >> 16);
5430 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5435 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5440 if (K ==
"minctasm") {
5445 if (K ==
"maxnreg") {
5450 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5454 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5458 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5462 if (K ==
"grid_constant") {
5477 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5484 if (!SeenNodes.
insert(MD).second)
5491 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
5498 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5500 const MDOperand &V = MD->getOperand(j + 1);
5503 NewOperands.
append({K, V});
5506 if (NewOperands.
size() > 1)
5519 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
5520 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5521 if (ModRetainReleaseMarker) {
5527 ID->getString().split(ValueComp,
"#");
5528 if (ValueComp.
size() == 2) {
5529 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
5533 M.eraseNamedMetadata(ModRetainReleaseMarker);
5544 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
5570 bool InvalidCast =
false;
5572 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
5585 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
5587 Args.push_back(Arg);
5594 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5599 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
5612 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5620 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5621 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5622 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5623 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5624 {
"objc_autoreleaseReturnValue",
5625 llvm::Intrinsic::objc_autoreleaseReturnValue},
5626 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5627 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5628 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5629 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5630 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5631 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5632 {
"objc_release", llvm::Intrinsic::objc_release},
5633 {
"objc_retain", llvm::Intrinsic::objc_retain},
5634 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5635 {
"objc_retainAutoreleaseReturnValue",
5636 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5637 {
"objc_retainAutoreleasedReturnValue",
5638 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5639 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5640 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5641 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5642 {
"objc_unsafeClaimAutoreleasedReturnValue",
5643 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5644 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5645 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5646 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5647 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5648 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5649 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5650 {
"objc_arc_annotation_topdown_bbstart",
5651 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5652 {
"objc_arc_annotation_topdown_bbend",
5653 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5654 {
"objc_arc_annotation_bottomup_bbstart",
5655 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5656 {
"objc_arc_annotation_bottomup_bbend",
5657 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5659 for (
auto &
I : RuntimeFuncs)
5660 UpgradeToIntrinsic(
I.first,
I.second);
5664 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5668 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
5669 bool HasSwiftVersionFlag =
false;
5670 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5677 if (
Op->getNumOperands() != 3)
5691 if (
ID->getString() ==
"Objective-C Image Info Version")
5693 if (
ID->getString() ==
"Objective-C Class Properties")
5694 HasClassProperties =
true;
5696 if (
ID->getString() ==
"PIC Level") {
5697 if (
auto *Behavior =
5699 uint64_t V = Behavior->getLimitedValue();
5705 if (
ID->getString() ==
"PIE Level")
5706 if (
auto *Behavior =
5713 if (
ID->getString() ==
"branch-target-enforcement" ||
5714 ID->getString().starts_with(
"sign-return-address")) {
5715 if (
auto *Behavior =
5721 Op->getOperand(1),
Op->getOperand(2)};
5731 if (
ID->getString() ==
"Objective-C Image Info Section") {
5734 Value->getString().split(ValueComp,
" ");
5735 if (ValueComp.
size() != 1) {
5736 std::string NewValue;
5737 for (
auto &S : ValueComp)
5738 NewValue += S.str();
5749 if (
ID->getString() ==
"Objective-C Garbage Collection") {
5752 assert(Md->getValue() &&
"Expected non-empty metadata");
5753 auto Type = Md->getValue()->getType();
5756 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5757 if ((Val & 0xff) != Val) {
5758 HasSwiftVersionFlag =
true;
5759 SwiftABIVersion = (Val & 0xff00) >> 8;
5760 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5761 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5772 if (
ID->getString() ==
"amdgpu_code_object_version") {
5775 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
5787 if (HasObjCFlag && !HasClassProperties) {
5793 if (HasSwiftVersionFlag) {
5797 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5799 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5807 auto TrimSpaces = [](
StringRef Section) -> std::string {
5809 Section.split(Components,
',');
5814 for (
auto Component : Components)
5815 OS <<
',' << Component.trim();
5820 for (
auto &GV : M.globals()) {
5821 if (!GV.hasSection())
5826 if (!Section.starts_with(
"__DATA, __objc_catlist"))
5831 GV.setSection(TrimSpaces(Section));
5847struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
5848 StrictFPUpgradeVisitor() =
default;
5851 if (!
Call.isStrictFP())
5857 Call.removeFnAttr(Attribute::StrictFP);
5858 Call.addFnAttr(Attribute::NoBuiltin);
5863struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5864 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5865 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
5867 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
5882 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
5883 StrictFPUpgradeVisitor SFPV;
5888 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
5889 F.getReturnType(),
F.getAttributes().getRetAttrs()));
5890 for (
auto &Arg :
F.args())
5892 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
5896 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
5897 A.isValid() &&
A.isStringAttribute()) {
5898 F.setSection(
A.getValueAsString());
5899 F.removeFnAttr(
"implicit-section-name");
5906 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
5909 if (
A.getValueAsBool()) {
5910 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5916 F.removeFnAttr(
"amdgpu-unsafe-fp-atomics");
5925 if (
T->getNumOperands() < 1)
5930 return S->getString().starts_with(
"llvm.vectorizer.");
5934 StringRef OldPrefix =
"llvm.vectorizer.";
5937 if (OldTag ==
"llvm.vectorizer.unroll")
5949 if (
T->getNumOperands() < 1)
5954 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
5959 Ops.reserve(
T->getNumOperands());
5961 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
5962 Ops.push_back(
T->getOperand(
I));
5976 Ops.reserve(
T->getNumOperands());
5987 if (((
T.isAMDGPU() && !
T.isAMDGCN()) ||
5988 (
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical()))) &&
5989 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
5990 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
5993 if (
T.isLoongArch64() ||
T.isRISCV64()) {
5995 auto I =
DL.find(
"-n64-");
5997 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6001 std::string Res =
DL.str();
6005 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6006 Res.append(Res.empty() ?
"G1" :
"-G1");
6011 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6012 Res.append(
"-ni:7:8:9");
6014 if (
DL.ends_with(
"ni:7"))
6016 if (
DL.ends_with(
"ni:7:8"))
6021 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6022 Res.append(
"-p7:160:256:256:32");
6023 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6024 Res.append(
"-p8:128:128:128:48");
6025 constexpr StringRef OldP8(
"-p8:128:128-");
6026 if (
DL.contains(OldP8))
6027 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6028 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6029 Res.append(
"-p9:192:256:256:32");
6034 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6037 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6038 if (!
DL.contains(AddrSpaces)) {
6040 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6041 if (R.match(Res, &
Groups))
6047 if (
T.isAArch64()) {
6049 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6050 Res.append(
"-Fn32");
6051 AddPtr32Ptr64AddrSpaces();
6055 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6059 std::string I64 =
"-i64:64";
6060 std::string I128 =
"-i128:128";
6062 size_t Pos = Res.find(I64);
6063 if (Pos !=
size_t(-1))
6064 Res.insert(Pos + I64.size(), I128);
6072 AddPtr32Ptr64AddrSpaces();
6080 if (!
T.isOSIAMCU()) {
6081 std::string I128 =
"-i128:128";
6084 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6085 if (R.match(Res, &
Groups))
6093 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6095 auto I =
Ref.find(
"-f80:32-");
6097 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6105 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6108 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6109 B.removeAttribute(
"no-frame-pointer-elim");
6111 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6113 if (FramePointer !=
"all")
6114 FramePointer =
"non-leaf";
6115 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6117 if (!FramePointer.
empty())
6118 B.addAttribute(
"frame-pointer", FramePointer);
6120 A =
B.getAttribute(
"null-pointer-is-valid");
6123 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6124 B.removeAttribute(
"null-pointer-is-valid");
6125 if (NullPointerIsValid)
6126 B.addAttribute(Attribute::NullPointerIsValid);
6136 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
static constexpr size_t npos
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
@ ADDRESS_SPACE_SHARED_CLUSTER
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.