34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsARM.h"
36#include "llvm/IR/IntrinsicsNVPTX.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
38#include "llvm/IR/IntrinsicsWebAssembly.h"
39#include "llvm/IR/IntrinsicsX86.h"
61 cl::desc(
"Disable autoupgrade of debug info"));
71 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
86 Type *LastArgType =
F->getFunctionType()->getParamType(
87 F->getFunctionType()->getNumParams() - 1);
102 if (
F->getReturnType()->isVectorTy())
115 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
116 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
130 if (
F->getReturnType()->getScalarType()->isBFloatTy())
140 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
154 if (Name.consume_front(
"avx."))
155 return (Name.starts_with(
"blend.p") ||
156 Name ==
"cvt.ps2.pd.256" ||
157 Name ==
"cvtdq2.pd.256" ||
158 Name ==
"cvtdq2.ps.256" ||
159 Name.starts_with(
"movnt.") ||
160 Name.starts_with(
"sqrt.p") ||
161 Name.starts_with(
"storeu.") ||
162 Name.starts_with(
"vbroadcast.s") ||
163 Name.starts_with(
"vbroadcastf128") ||
164 Name.starts_with(
"vextractf128.") ||
165 Name.starts_with(
"vinsertf128.") ||
166 Name.starts_with(
"vperm2f128.") ||
167 Name.starts_with(
"vpermil."));
169 if (Name.consume_front(
"avx2."))
170 return (Name ==
"movntdqa" ||
171 Name.starts_with(
"pabs.") ||
172 Name.starts_with(
"padds.") ||
173 Name.starts_with(
"paddus.") ||
174 Name.starts_with(
"pblendd.") ||
176 Name.starts_with(
"pbroadcast") ||
177 Name.starts_with(
"pcmpeq.") ||
178 Name.starts_with(
"pcmpgt.") ||
179 Name.starts_with(
"pmax") ||
180 Name.starts_with(
"pmin") ||
181 Name.starts_with(
"pmovsx") ||
182 Name.starts_with(
"pmovzx") ||
184 Name ==
"pmulu.dq" ||
185 Name.starts_with(
"psll.dq") ||
186 Name.starts_with(
"psrl.dq") ||
187 Name.starts_with(
"psubs.") ||
188 Name.starts_with(
"psubus.") ||
189 Name.starts_with(
"vbroadcast") ||
190 Name ==
"vbroadcasti128" ||
191 Name ==
"vextracti128" ||
192 Name ==
"vinserti128" ||
193 Name ==
"vperm2i128");
195 if (Name.consume_front(
"avx512.")) {
196 if (Name.consume_front(
"mask."))
198 return (Name.starts_with(
"add.p") ||
199 Name.starts_with(
"and.") ||
200 Name.starts_with(
"andn.") ||
201 Name.starts_with(
"broadcast.s") ||
202 Name.starts_with(
"broadcastf32x4.") ||
203 Name.starts_with(
"broadcastf32x8.") ||
204 Name.starts_with(
"broadcastf64x2.") ||
205 Name.starts_with(
"broadcastf64x4.") ||
206 Name.starts_with(
"broadcasti32x4.") ||
207 Name.starts_with(
"broadcasti32x8.") ||
208 Name.starts_with(
"broadcasti64x2.") ||
209 Name.starts_with(
"broadcasti64x4.") ||
210 Name.starts_with(
"cmp.b") ||
211 Name.starts_with(
"cmp.d") ||
212 Name.starts_with(
"cmp.q") ||
213 Name.starts_with(
"cmp.w") ||
214 Name.starts_with(
"compress.b") ||
215 Name.starts_with(
"compress.d") ||
216 Name.starts_with(
"compress.p") ||
217 Name.starts_with(
"compress.q") ||
218 Name.starts_with(
"compress.store.") ||
219 Name.starts_with(
"compress.w") ||
220 Name.starts_with(
"conflict.") ||
221 Name.starts_with(
"cvtdq2pd.") ||
222 Name.starts_with(
"cvtdq2ps.") ||
223 Name ==
"cvtpd2dq.256" ||
224 Name ==
"cvtpd2ps.256" ||
225 Name ==
"cvtps2pd.128" ||
226 Name ==
"cvtps2pd.256" ||
227 Name.starts_with(
"cvtqq2pd.") ||
228 Name ==
"cvtqq2ps.256" ||
229 Name ==
"cvtqq2ps.512" ||
230 Name ==
"cvttpd2dq.256" ||
231 Name ==
"cvttps2dq.128" ||
232 Name ==
"cvttps2dq.256" ||
233 Name.starts_with(
"cvtudq2pd.") ||
234 Name.starts_with(
"cvtudq2ps.") ||
235 Name.starts_with(
"cvtuqq2pd.") ||
236 Name ==
"cvtuqq2ps.256" ||
237 Name ==
"cvtuqq2ps.512" ||
238 Name.starts_with(
"dbpsadbw.") ||
239 Name.starts_with(
"div.p") ||
240 Name.starts_with(
"expand.b") ||
241 Name.starts_with(
"expand.d") ||
242 Name.starts_with(
"expand.load.") ||
243 Name.starts_with(
"expand.p") ||
244 Name.starts_with(
"expand.q") ||
245 Name.starts_with(
"expand.w") ||
246 Name.starts_with(
"fpclass.p") ||
247 Name.starts_with(
"insert") ||
248 Name.starts_with(
"load.") ||
249 Name.starts_with(
"loadu.") ||
250 Name.starts_with(
"lzcnt.") ||
251 Name.starts_with(
"max.p") ||
252 Name.starts_with(
"min.p") ||
253 Name.starts_with(
"movddup") ||
254 Name.starts_with(
"move.s") ||
255 Name.starts_with(
"movshdup") ||
256 Name.starts_with(
"movsldup") ||
257 Name.starts_with(
"mul.p") ||
258 Name.starts_with(
"or.") ||
259 Name.starts_with(
"pabs.") ||
260 Name.starts_with(
"packssdw.") ||
261 Name.starts_with(
"packsswb.") ||
262 Name.starts_with(
"packusdw.") ||
263 Name.starts_with(
"packuswb.") ||
264 Name.starts_with(
"padd.") ||
265 Name.starts_with(
"padds.") ||
266 Name.starts_with(
"paddus.") ||
267 Name.starts_with(
"palignr.") ||
268 Name.starts_with(
"pand.") ||
269 Name.starts_with(
"pandn.") ||
270 Name.starts_with(
"pavg") ||
271 Name.starts_with(
"pbroadcast") ||
272 Name.starts_with(
"pcmpeq.") ||
273 Name.starts_with(
"pcmpgt.") ||
274 Name.starts_with(
"perm.df.") ||
275 Name.starts_with(
"perm.di.") ||
276 Name.starts_with(
"permvar.") ||
277 Name.starts_with(
"pmaddubs.w.") ||
278 Name.starts_with(
"pmaddw.d.") ||
279 Name.starts_with(
"pmax") ||
280 Name.starts_with(
"pmin") ||
281 Name ==
"pmov.qd.256" ||
282 Name ==
"pmov.qd.512" ||
283 Name ==
"pmov.wb.256" ||
284 Name ==
"pmov.wb.512" ||
285 Name.starts_with(
"pmovsx") ||
286 Name.starts_with(
"pmovzx") ||
287 Name.starts_with(
"pmul.dq.") ||
288 Name.starts_with(
"pmul.hr.sw.") ||
289 Name.starts_with(
"pmulh.w.") ||
290 Name.starts_with(
"pmulhu.w.") ||
291 Name.starts_with(
"pmull.") ||
292 Name.starts_with(
"pmultishift.qb.") ||
293 Name.starts_with(
"pmulu.dq.") ||
294 Name.starts_with(
"por.") ||
295 Name.starts_with(
"prol.") ||
296 Name.starts_with(
"prolv.") ||
297 Name.starts_with(
"pror.") ||
298 Name.starts_with(
"prorv.") ||
299 Name.starts_with(
"pshuf.b.") ||
300 Name.starts_with(
"pshuf.d.") ||
301 Name.starts_with(
"pshufh.w.") ||
302 Name.starts_with(
"pshufl.w.") ||
303 Name.starts_with(
"psll.d") ||
304 Name.starts_with(
"psll.q") ||
305 Name.starts_with(
"psll.w") ||
306 Name.starts_with(
"pslli") ||
307 Name.starts_with(
"psllv") ||
308 Name.starts_with(
"psra.d") ||
309 Name.starts_with(
"psra.q") ||
310 Name.starts_with(
"psra.w") ||
311 Name.starts_with(
"psrai") ||
312 Name.starts_with(
"psrav") ||
313 Name.starts_with(
"psrl.d") ||
314 Name.starts_with(
"psrl.q") ||
315 Name.starts_with(
"psrl.w") ||
316 Name.starts_with(
"psrli") ||
317 Name.starts_with(
"psrlv") ||
318 Name.starts_with(
"psub.") ||
319 Name.starts_with(
"psubs.") ||
320 Name.starts_with(
"psubus.") ||
321 Name.starts_with(
"pternlog.") ||
322 Name.starts_with(
"punpckh") ||
323 Name.starts_with(
"punpckl") ||
324 Name.starts_with(
"pxor.") ||
325 Name.starts_with(
"shuf.f") ||
326 Name.starts_with(
"shuf.i") ||
327 Name.starts_with(
"shuf.p") ||
328 Name.starts_with(
"sqrt.p") ||
329 Name.starts_with(
"store.b.") ||
330 Name.starts_with(
"store.d.") ||
331 Name.starts_with(
"store.p") ||
332 Name.starts_with(
"store.q.") ||
333 Name.starts_with(
"store.w.") ||
334 Name ==
"store.ss" ||
335 Name.starts_with(
"storeu.") ||
336 Name.starts_with(
"sub.p") ||
337 Name.starts_with(
"ucmp.") ||
338 Name.starts_with(
"unpckh.") ||
339 Name.starts_with(
"unpckl.") ||
340 Name.starts_with(
"valign.") ||
341 Name ==
"vcvtph2ps.128" ||
342 Name ==
"vcvtph2ps.256" ||
343 Name.starts_with(
"vextract") ||
344 Name.starts_with(
"vfmadd.") ||
345 Name.starts_with(
"vfmaddsub.") ||
346 Name.starts_with(
"vfnmadd.") ||
347 Name.starts_with(
"vfnmsub.") ||
348 Name.starts_with(
"vpdpbusd.") ||
349 Name.starts_with(
"vpdpbusds.") ||
350 Name.starts_with(
"vpdpwssd.") ||
351 Name.starts_with(
"vpdpwssds.") ||
352 Name.starts_with(
"vpermi2var.") ||
353 Name.starts_with(
"vpermil.p") ||
354 Name.starts_with(
"vpermilvar.") ||
355 Name.starts_with(
"vpermt2var.") ||
356 Name.starts_with(
"vpmadd52") ||
357 Name.starts_with(
"vpshld.") ||
358 Name.starts_with(
"vpshldv.") ||
359 Name.starts_with(
"vpshrd.") ||
360 Name.starts_with(
"vpshrdv.") ||
361 Name.starts_with(
"vpshufbitqmb.") ||
362 Name.starts_with(
"xor."));
364 if (Name.consume_front(
"mask3."))
366 return (Name.starts_with(
"vfmadd.") ||
367 Name.starts_with(
"vfmaddsub.") ||
368 Name.starts_with(
"vfmsub.") ||
369 Name.starts_with(
"vfmsubadd.") ||
370 Name.starts_with(
"vfnmsub."));
372 if (Name.consume_front(
"maskz."))
374 return (Name.starts_with(
"pternlog.") ||
375 Name.starts_with(
"vfmadd.") ||
376 Name.starts_with(
"vfmaddsub.") ||
377 Name.starts_with(
"vpdpbusd.") ||
378 Name.starts_with(
"vpdpbusds.") ||
379 Name.starts_with(
"vpdpwssd.") ||
380 Name.starts_with(
"vpdpwssds.") ||
381 Name.starts_with(
"vpermt2var.") ||
382 Name.starts_with(
"vpmadd52") ||
383 Name.starts_with(
"vpshldv.") ||
384 Name.starts_with(
"vpshrdv."));
387 return (Name ==
"movntdqa" ||
388 Name ==
"pmul.dq.512" ||
389 Name ==
"pmulu.dq.512" ||
390 Name.starts_with(
"broadcastm") ||
391 Name.starts_with(
"cmp.p") ||
392 Name.starts_with(
"cvtb2mask.") ||
393 Name.starts_with(
"cvtd2mask.") ||
394 Name.starts_with(
"cvtmask2") ||
395 Name.starts_with(
"cvtq2mask.") ||
396 Name ==
"cvtusi2sd" ||
397 Name.starts_with(
"cvtw2mask.") ||
402 Name ==
"kortestc.w" ||
403 Name ==
"kortestz.w" ||
404 Name.starts_with(
"kunpck") ||
407 Name.starts_with(
"padds.") ||
408 Name.starts_with(
"pbroadcast") ||
409 Name.starts_with(
"prol") ||
410 Name.starts_with(
"pror") ||
411 Name.starts_with(
"psll.dq") ||
412 Name.starts_with(
"psrl.dq") ||
413 Name.starts_with(
"psubs.") ||
414 Name.starts_with(
"ptestm") ||
415 Name.starts_with(
"ptestnm") ||
416 Name.starts_with(
"storent.") ||
417 Name.starts_with(
"vbroadcast.s") ||
418 Name.starts_with(
"vpshld.") ||
419 Name.starts_with(
"vpshrd."));
422 if (Name.consume_front(
"fma."))
423 return (Name.starts_with(
"vfmadd.") ||
424 Name.starts_with(
"vfmsub.") ||
425 Name.starts_with(
"vfmsubadd.") ||
426 Name.starts_with(
"vfnmadd.") ||
427 Name.starts_with(
"vfnmsub."));
429 if (Name.consume_front(
"fma4."))
430 return Name.starts_with(
"vfmadd.s");
432 if (Name.consume_front(
"sse."))
433 return (Name ==
"add.ss" ||
434 Name ==
"cvtsi2ss" ||
435 Name ==
"cvtsi642ss" ||
438 Name.starts_with(
"sqrt.p") ||
440 Name.starts_with(
"storeu.") ||
443 if (Name.consume_front(
"sse2."))
444 return (Name ==
"add.sd" ||
445 Name ==
"cvtdq2pd" ||
446 Name ==
"cvtdq2ps" ||
447 Name ==
"cvtps2pd" ||
448 Name ==
"cvtsi2sd" ||
449 Name ==
"cvtsi642sd" ||
450 Name ==
"cvtss2sd" ||
453 Name.starts_with(
"padds.") ||
454 Name.starts_with(
"paddus.") ||
455 Name.starts_with(
"pcmpeq.") ||
456 Name.starts_with(
"pcmpgt.") ||
461 Name ==
"pmulu.dq" ||
462 Name.starts_with(
"pshuf") ||
463 Name.starts_with(
"psll.dq") ||
464 Name.starts_with(
"psrl.dq") ||
465 Name.starts_with(
"psubs.") ||
466 Name.starts_with(
"psubus.") ||
467 Name.starts_with(
"sqrt.p") ||
469 Name ==
"storel.dq" ||
470 Name.starts_with(
"storeu.") ||
473 if (Name.consume_front(
"sse41."))
474 return (Name.starts_with(
"blendp") ||
475 Name ==
"movntdqa" ||
485 Name.starts_with(
"pmovsx") ||
486 Name.starts_with(
"pmovzx") ||
489 if (Name.consume_front(
"sse42."))
490 return Name ==
"crc32.64.8";
492 if (Name.consume_front(
"sse4a."))
493 return Name.starts_with(
"movnt.");
495 if (Name.consume_front(
"ssse3."))
496 return (Name ==
"pabs.b.128" ||
497 Name ==
"pabs.d.128" ||
498 Name ==
"pabs.w.128");
500 if (Name.consume_front(
"xop."))
501 return (Name ==
"vpcmov" ||
502 Name ==
"vpcmov.256" ||
503 Name.starts_with(
"vpcom") ||
504 Name.starts_with(
"vprot"));
506 return (Name ==
"addcarry.u32" ||
507 Name ==
"addcarry.u64" ||
508 Name ==
"addcarryx.u32" ||
509 Name ==
"addcarryx.u64" ||
510 Name ==
"subborrow.u32" ||
511 Name ==
"subborrow.u64" ||
512 Name.starts_with(
"vcvtph2ps."));
518 if (!Name.consume_front(
"x86."))
526 if (Name ==
"rdtscp") {
528 if (
F->getFunctionType()->getNumParams() == 0)
533 Intrinsic::x86_rdtscp);
540 if (Name.consume_front(
"sse41.ptest")) {
542 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
543 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
544 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
557 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
558 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
559 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
560 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
561 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
562 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
567 if (Name.consume_front(
"avx512.")) {
568 if (Name.consume_front(
"mask.cmp.")) {
571 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
572 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
573 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
574 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
575 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
576 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
580 }
else if (Name.starts_with(
"vpdpbusd.") ||
581 Name.starts_with(
"vpdpbusds.")) {
584 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
585 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
586 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
587 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
588 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
589 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
597 if (Name.consume_front(
"avx512bf16.")) {
600 .
Case(
"cvtne2ps2bf16.128",
601 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
602 .
Case(
"cvtne2ps2bf16.256",
603 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
604 .
Case(
"cvtne2ps2bf16.512",
605 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
606 .
Case(
"mask.cvtneps2bf16.128",
607 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
608 .
Case(
"cvtneps2bf16.256",
609 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
610 .
Case(
"cvtneps2bf16.512",
611 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
618 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
619 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
620 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
627 if (Name.consume_front(
"xop.")) {
629 if (Name.starts_with(
"vpermil2")) {
632 auto Idx =
F->getFunctionType()->getParamType(2);
633 if (Idx->isFPOrFPVectorTy()) {
634 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
635 unsigned EltSize = Idx->getScalarSizeInBits();
636 if (EltSize == 64 && IdxSize == 128)
637 ID = Intrinsic::x86_xop_vpermil2pd;
638 else if (EltSize == 32 && IdxSize == 128)
639 ID = Intrinsic::x86_xop_vpermil2ps;
640 else if (EltSize == 64 && IdxSize == 256)
641 ID = Intrinsic::x86_xop_vpermil2pd_256;
643 ID = Intrinsic::x86_xop_vpermil2ps_256;
645 }
else if (
F->arg_size() == 2)
648 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
649 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
660 if (Name ==
"seh.recoverfp") {
662 Intrinsic::eh_recoverfp);
674 if (Name.starts_with(
"rbit")) {
677 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
681 if (Name ==
"thread.pointer") {
684 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
688 bool Neon = Name.consume_front(
"neon.");
693 if (Name.consume_front(
"bfdot.")) {
697 .
Cases(
"v2f32.v8i8",
"v4f32.v16i8",
702 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
703 assert((OperandWidth == 64 || OperandWidth == 128) &&
704 "Unexpected operand width");
706 std::array<Type *, 2> Tys{
717 if (Name.consume_front(
"bfm")) {
719 if (Name.consume_back(
".v4f32.v16i8")) {
765 F->arg_begin()->getType());
769 if (Name.consume_front(
"vst")) {
771 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
775 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
776 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
779 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
780 Intrinsic::arm_neon_vst4lane};
782 auto fArgs =
F->getFunctionType()->params();
783 Type *Tys[] = {fArgs[0], fArgs[1]};
786 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
789 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
798 if (Name.consume_front(
"mve.")) {
800 if (Name ==
"vctp64") {
810 if (Name.starts_with(
"vrintn.v")) {
812 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
817 if (Name.consume_back(
".v4i1")) {
819 if (Name.consume_back(
".predicated.v2i64.v4i32"))
821 return Name ==
"mull.int" || Name ==
"vqdmull";
823 if (Name.consume_back(
".v2i64")) {
825 bool IsGather = Name.consume_front(
"vldr.gather.");
826 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
827 if (Name.consume_front(
"base.")) {
829 Name.consume_front(
"wb.");
832 return Name ==
"predicated.v2i64";
835 if (Name.consume_front(
"offset.predicated."))
836 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
837 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
850 if (Name.consume_front(
"cde.vcx")) {
852 if (Name.consume_back(
".predicated.v2i64.v4i1"))
854 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
855 Name ==
"3q" || Name ==
"3qa";
869 F->arg_begin()->getType());
873 if (Name.starts_with(
"addp")) {
875 if (
F->arg_size() != 2)
878 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
880 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
886 if (Name.starts_with(
"bfcvt")) {
893 if (Name.consume_front(
"sve.")) {
895 if (Name.consume_front(
"bf")) {
896 if (Name.consume_back(
".lane")) {
900 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
901 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
902 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
914 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
919 if (Name.consume_front(
"addqv")) {
921 if (!
F->getReturnType()->isFPOrFPVectorTy())
924 auto Args =
F->getFunctionType()->params();
925 Type *Tys[] = {
F->getReturnType(), Args[1]};
927 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
931 if (Name.consume_front(
"ld")) {
933 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
934 if (LdRegex.
match(Name)) {
941 Intrinsic::aarch64_sve_ld2_sret,
942 Intrinsic::aarch64_sve_ld3_sret,
943 Intrinsic::aarch64_sve_ld4_sret,
946 LoadIDs[Name[0] -
'2'], Ty);
952 if (Name.consume_front(
"tuple.")) {
954 if (Name.starts_with(
"get")) {
956 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
958 F->getParent(), Intrinsic::vector_extract, Tys);
962 if (Name.starts_with(
"set")) {
964 auto Args =
F->getFunctionType()->params();
965 Type *Tys[] = {Args[0], Args[2], Args[1]};
967 F->getParent(), Intrinsic::vector_insert, Tys);
971 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
972 if (CreateTupleRegex.
match(Name)) {
974 auto Args =
F->getFunctionType()->params();
975 Type *Tys[] = {
F->getReturnType(), Args[1]};
977 F->getParent(), Intrinsic::vector_insert, Tys);
990 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
994 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
996 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
998 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
999 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1000 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1001 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1002 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1003 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1012 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1026 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1027 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1037 if (Name.consume_front(
"mapa.shared.cluster"))
1038 if (
F->getReturnType()->getPointerAddressSpace() ==
1040 return Intrinsic::nvvm_mapa_shared_cluster;
1042 if (Name.consume_front(
"cp.async.bulk.")) {
1045 .
Case(
"global.to.shared.cluster",
1046 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1047 .
Case(
"shared.cta.to.cluster",
1048 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1052 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1061 if (Name.consume_front(
"fma.rn."))
1063 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1064 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1065 .
Case(
"ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1066 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1067 .
Case(
"ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1068 .
Case(
"ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1069 .
Case(
"ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1070 .
Case(
"ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1071 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1072 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1073 .
Case(
"sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1074 .
Case(
"sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1077 if (Name.consume_front(
"fmax."))
1079 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1080 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1081 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1082 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1083 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1084 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1085 .
Case(
"ftz.nan.xorsign.abs.bf16",
1086 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1087 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1088 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1089 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1090 .
Case(
"ftz.xorsign.abs.bf16x2",
1091 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1092 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1093 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1094 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1095 .
Case(
"nan.xorsign.abs.bf16x2",
1096 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1097 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1098 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1101 if (Name.consume_front(
"fmin."))
1103 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1104 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1105 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1106 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1107 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1108 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1109 .
Case(
"ftz.nan.xorsign.abs.bf16",
1110 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1111 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1112 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1113 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1114 .
Case(
"ftz.xorsign.abs.bf16x2",
1115 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1116 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1117 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1118 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1119 .
Case(
"nan.xorsign.abs.bf16x2",
1120 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1121 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1122 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1125 if (Name.consume_front(
"neg."))
1127 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1128 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1135 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1136 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1137 Name.consume_front(
"param");
1141 bool CanUpgradeDebugIntrinsicsToRecords) {
1142 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1147 if (!Name.consume_front(
"llvm.") || Name.empty())
1153 bool IsArm = Name.consume_front(
"arm.");
1154 if (IsArm || Name.consume_front(
"aarch64.")) {
1160 if (Name.consume_front(
"amdgcn.")) {
1161 if (Name ==
"alignbit") {
1164 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1168 if (Name.consume_front(
"atomic.")) {
1169 if (Name.starts_with(
"inc") || Name.starts_with(
"dec")) {
1178 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1179 Name.consume_front(
"flat.atomic.")) {
1180 if (Name.starts_with(
"fadd") ||
1182 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1183 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1191 if (Name.starts_with(
"ldexp.")) {
1194 F->getParent(), Intrinsic::ldexp,
1195 {F->getReturnType(), F->getArg(1)->getType()});
1204 if (
F->arg_size() == 1) {
1212 F->arg_begin()->getType());
1217 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1220 Intrinsic::coro_end);
1227 if (Name.consume_front(
"dbg.")) {
1229 if (CanUpgradeDebugIntrinsicsToRecords) {
1230 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1231 Name ==
"declare" || Name ==
"label") {
1240 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1243 Intrinsic::dbg_value);
1250 if (Name.consume_front(
"experimental.vector.")) {
1256 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1257 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1258 .
StartsWith(
"splice.", Intrinsic::vector_splice)
1259 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1260 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1261 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1263 Intrinsic::vector_partial_reduce_add)
1266 const auto *FT =
F->getFunctionType();
1268 if (
ID == Intrinsic::vector_extract ||
1269 ID == Intrinsic::vector_interleave2)
1272 if (
ID != Intrinsic::vector_interleave2)
1274 if (
ID == Intrinsic::vector_insert ||
1275 ID == Intrinsic::vector_partial_reduce_add)
1283 if (Name.consume_front(
"reduce.")) {
1285 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1286 if (R.match(Name, &
Groups))
1288 .
Case(
"add", Intrinsic::vector_reduce_add)
1289 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1290 .
Case(
"and", Intrinsic::vector_reduce_and)
1291 .
Case(
"or", Intrinsic::vector_reduce_or)
1292 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1293 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1294 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1295 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1296 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1297 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1298 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1303 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1308 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1309 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1314 auto Args =
F->getFunctionType()->params();
1316 {Args[V2 ? 1 : 0]});
1323 if (Name.consume_front(
"experimental.stepvector.")) {
1327 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1332 if (Name.starts_with(
"flt.rounds")) {
1335 Intrinsic::get_rounding);
1340 if (Name.starts_with(
"invariant.group.barrier")) {
1342 auto Args =
F->getFunctionType()->params();
1343 Type* ObjectPtr[1] = {Args[0]};
1346 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1351 if ((Name.starts_with(
"lifetime.start") ||
1352 Name.starts_with(
"lifetime.end")) &&
1353 F->arg_size() == 2) {
1355 ? Intrinsic::lifetime_start
1356 : Intrinsic::lifetime_end;
1359 F->getArg(0)->getType());
1368 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1369 .StartsWith(
"memmove.", Intrinsic::memmove)
1371 if (
F->arg_size() == 5) {
1375 F->getFunctionType()->params().slice(0, 3);
1381 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1384 const auto *FT =
F->getFunctionType();
1385 Type *ParamTypes[2] = {
1386 FT->getParamType(0),
1390 Intrinsic::memset, ParamTypes);
1396 if (Name.consume_front(
"nvvm.")) {
1398 if (
F->arg_size() == 1) {
1401 .
Cases(
"brev32",
"brev64", Intrinsic::bitreverse)
1402 .
Case(
"clz.i", Intrinsic::ctlz)
1403 .
Case(
"popc.i", Intrinsic::ctpop)
1407 {F->getReturnType()});
1413 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1441 bool Expand =
false;
1442 if (Name.consume_front(
"abs."))
1445 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1446 else if (Name.consume_front(
"fabs."))
1448 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1449 else if (Name.consume_front(
"max.") || Name.consume_front(
"min."))
1451 Expand = Name ==
"s" || Name ==
"i" || Name ==
"ll" || Name ==
"us" ||
1452 Name ==
"ui" || Name ==
"ull";
1453 else if (Name.consume_front(
"atomic.load."))
1462 else if (Name.consume_front(
"bitcast."))
1465 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1466 else if (Name.consume_front(
"rotate."))
1468 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1469 else if (Name.consume_front(
"ptr.gen.to."))
1472 else if (Name.consume_front(
"ptr."))
1475 else if (Name.consume_front(
"ldg.global."))
1477 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1478 Name.starts_with(
"p."));
1481 .
Case(
"barrier0",
true)
1482 .
Case(
"barrier.n",
true)
1483 .
Case(
"barrier.sync.cnt",
true)
1484 .
Case(
"barrier.sync",
true)
1485 .
Case(
"barrier",
true)
1486 .
Case(
"bar.sync",
true)
1487 .
Case(
"clz.ll",
true)
1488 .
Case(
"popc.ll",
true)
1490 .
Case(
"swap.lo.hi.b64",
true)
1491 .
Case(
"tanh.approx.f32",
true)
1503 if (Name.starts_with(
"objectsize.")) {
1504 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1505 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1508 Intrinsic::objectsize, Tys);
1515 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1518 F->getParent(), Intrinsic::ptr_annotation,
1519 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1525 if (Name.consume_front(
"riscv.")) {
1528 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1529 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1530 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1531 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1534 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1547 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1548 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1557 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1558 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1559 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1560 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1565 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1577 if (Name ==
"stackprotectorcheck") {
1584 if (Name ==
"thread.pointer") {
1586 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1592 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1595 F->getParent(), Intrinsic::var_annotation,
1596 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1603 if (Name.consume_front(
"wasm.")) {
1606 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1607 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1608 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1613 F->getReturnType());
1617 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1619 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1621 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1640 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1649 auto *FT =
F->getFunctionType();
1652 std::string
Name =
F->getName().str();
1655 Name,
F->getParent());
1666 if (Result != std::nullopt) {
1679 bool CanUpgradeDebugIntrinsicsToRecords) {
1699 GV->
getName() ==
"llvm.global_dtors")) ||
1714 unsigned N =
Init->getNumOperands();
1715 std::vector<Constant *> NewCtors(
N);
1716 for (
unsigned i = 0; i !=
N; ++i) {
1719 Ctor->getAggregateElement(1),
1733 unsigned NumElts = ResultTy->getNumElements() * 8;
1737 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1747 for (
unsigned l = 0; l != NumElts; l += 16)
1748 for (
unsigned i = 0; i != 16; ++i) {
1749 unsigned Idx = NumElts + i - Shift;
1751 Idx -= NumElts - 16;
1752 Idxs[l + i] = Idx + l;
1755 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1759 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1767 unsigned NumElts = ResultTy->getNumElements() * 8;
1771 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1781 for (
unsigned l = 0; l != NumElts; l += 16)
1782 for (
unsigned i = 0; i != 16; ++i) {
1783 unsigned Idx = i + Shift;
1785 Idx += NumElts - 16;
1786 Idxs[l + i] = Idx + l;
1789 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
1793 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1801 Mask = Builder.CreateBitCast(Mask, MaskTy);
1807 for (
unsigned i = 0; i != NumElts; ++i)
1809 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
1820 if (
C->isAllOnesValue())
1825 return Builder.CreateSelect(Mask, Op0, Op1);
1832 if (
C->isAllOnesValue())
1836 Mask->getType()->getIntegerBitWidth());
1837 Mask = Builder.CreateBitCast(Mask, MaskTy);
1838 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
1839 return Builder.CreateSelect(Mask, Op0, Op1);
1852 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
1853 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
1858 ShiftVal &= (NumElts - 1);
1867 if (ShiftVal > 16) {
1875 for (
unsigned l = 0; l < NumElts; l += 16) {
1876 for (
unsigned i = 0; i != 16; ++i) {
1877 unsigned Idx = ShiftVal + i;
1878 if (!IsVALIGN && Idx >= 16)
1879 Idx += NumElts - 16;
1880 Indices[l + i] = Idx + l;
1885 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
1891 bool ZeroMask,
bool IndexForm) {
1894 unsigned EltWidth = Ty->getScalarSizeInBits();
1895 bool IsFloat = Ty->isFPOrFPVectorTy();
1897 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1898 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1899 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1900 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1901 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1902 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1903 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1904 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1905 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1906 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1907 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1908 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1909 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1910 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1911 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1912 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1913 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1914 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1915 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1916 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1917 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1918 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1919 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1920 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1921 else if (VecWidth == 128 && EltWidth == 16)
1922 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1923 else if (VecWidth == 256 && EltWidth == 16)
1924 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1925 else if (VecWidth == 512 && EltWidth == 16)
1926 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1927 else if (VecWidth == 128 && EltWidth == 8)
1928 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1929 else if (VecWidth == 256 && EltWidth == 8)
1930 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1931 else if (VecWidth == 512 && EltWidth == 8)
1932 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1943 Value *V = Builder.CreateIntrinsic(IID, Args);
1955 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
1966 bool IsRotateRight) {
1976 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
1977 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1980 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1981 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2026 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2031 bool IsShiftRight,
bool ZeroMask) {
2045 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2046 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2049 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2050 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2065 const Align Alignment =
2067 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2072 if (
C->isAllOnesValue())
2073 return Builder.CreateAlignedStore(
Data,
Ptr, Alignment);
2078 return Builder.CreateMaskedStore(
Data,
Ptr, Alignment, Mask);
2084 const Align Alignment =
2093 if (
C->isAllOnesValue())
2094 return Builder.CreateAlignedLoad(ValTy,
Ptr, Alignment);
2099 return Builder.CreateMaskedLoad(ValTy,
Ptr, Alignment, Mask, Passthru);
2105 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2106 {Op0, Builder.getInt1(
false)});
2121 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2122 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2123 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2124 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2125 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2128 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2129 LHS = Builder.CreateAnd(
LHS, Mask);
2130 RHS = Builder.CreateAnd(
RHS, Mask);
2147 if (!
C || !
C->isAllOnesValue())
2148 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2153 for (
unsigned i = 0; i != NumElts; ++i)
2155 for (
unsigned i = NumElts; i != 8; ++i)
2156 Indices[i] = NumElts + i % NumElts;
2157 Vec = Builder.CreateShuffleVector(Vec,
2161 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2165 unsigned CC,
bool Signed) {
2173 }
else if (CC == 7) {
2209 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2210 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2212 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2213 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2222 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2228 Name = Name.substr(12);
2233 if (Name.starts_with(
"max.p")) {
2234 if (VecWidth == 128 && EltWidth == 32)
2235 IID = Intrinsic::x86_sse_max_ps;
2236 else if (VecWidth == 128 && EltWidth == 64)
2237 IID = Intrinsic::x86_sse2_max_pd;
2238 else if (VecWidth == 256 && EltWidth == 32)
2239 IID = Intrinsic::x86_avx_max_ps_256;
2240 else if (VecWidth == 256 && EltWidth == 64)
2241 IID = Intrinsic::x86_avx_max_pd_256;
2244 }
else if (Name.starts_with(
"min.p")) {
2245 if (VecWidth == 128 && EltWidth == 32)
2246 IID = Intrinsic::x86_sse_min_ps;
2247 else if (VecWidth == 128 && EltWidth == 64)
2248 IID = Intrinsic::x86_sse2_min_pd;
2249 else if (VecWidth == 256 && EltWidth == 32)
2250 IID = Intrinsic::x86_avx_min_ps_256;
2251 else if (VecWidth == 256 && EltWidth == 64)
2252 IID = Intrinsic::x86_avx_min_pd_256;
2255 }
else if (Name.starts_with(
"pshuf.b.")) {
2256 if (VecWidth == 128)
2257 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2258 else if (VecWidth == 256)
2259 IID = Intrinsic::x86_avx2_pshuf_b;
2260 else if (VecWidth == 512)
2261 IID = Intrinsic::x86_avx512_pshuf_b_512;
2264 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2265 if (VecWidth == 128)
2266 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2267 else if (VecWidth == 256)
2268 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2269 else if (VecWidth == 512)
2270 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2273 }
else if (Name.starts_with(
"pmulh.w.")) {
2274 if (VecWidth == 128)
2275 IID = Intrinsic::x86_sse2_pmulh_w;
2276 else if (VecWidth == 256)
2277 IID = Intrinsic::x86_avx2_pmulh_w;
2278 else if (VecWidth == 512)
2279 IID = Intrinsic::x86_avx512_pmulh_w_512;
2282 }
else if (Name.starts_with(
"pmulhu.w.")) {
2283 if (VecWidth == 128)
2284 IID = Intrinsic::x86_sse2_pmulhu_w;
2285 else if (VecWidth == 256)
2286 IID = Intrinsic::x86_avx2_pmulhu_w;
2287 else if (VecWidth == 512)
2288 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2291 }
else if (Name.starts_with(
"pmaddw.d.")) {
2292 if (VecWidth == 128)
2293 IID = Intrinsic::x86_sse2_pmadd_wd;
2294 else if (VecWidth == 256)
2295 IID = Intrinsic::x86_avx2_pmadd_wd;
2296 else if (VecWidth == 512)
2297 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2300 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2301 if (VecWidth == 128)
2302 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2303 else if (VecWidth == 256)
2304 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2305 else if (VecWidth == 512)
2306 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2309 }
else if (Name.starts_with(
"packsswb.")) {
2310 if (VecWidth == 128)
2311 IID = Intrinsic::x86_sse2_packsswb_128;
2312 else if (VecWidth == 256)
2313 IID = Intrinsic::x86_avx2_packsswb;
2314 else if (VecWidth == 512)
2315 IID = Intrinsic::x86_avx512_packsswb_512;
2318 }
else if (Name.starts_with(
"packssdw.")) {
2319 if (VecWidth == 128)
2320 IID = Intrinsic::x86_sse2_packssdw_128;
2321 else if (VecWidth == 256)
2322 IID = Intrinsic::x86_avx2_packssdw;
2323 else if (VecWidth == 512)
2324 IID = Intrinsic::x86_avx512_packssdw_512;
2327 }
else if (Name.starts_with(
"packuswb.")) {
2328 if (VecWidth == 128)
2329 IID = Intrinsic::x86_sse2_packuswb_128;
2330 else if (VecWidth == 256)
2331 IID = Intrinsic::x86_avx2_packuswb;
2332 else if (VecWidth == 512)
2333 IID = Intrinsic::x86_avx512_packuswb_512;
2336 }
else if (Name.starts_with(
"packusdw.")) {
2337 if (VecWidth == 128)
2338 IID = Intrinsic::x86_sse41_packusdw;
2339 else if (VecWidth == 256)
2340 IID = Intrinsic::x86_avx2_packusdw;
2341 else if (VecWidth == 512)
2342 IID = Intrinsic::x86_avx512_packusdw_512;
2345 }
else if (Name.starts_with(
"vpermilvar.")) {
2346 if (VecWidth == 128 && EltWidth == 32)
2347 IID = Intrinsic::x86_avx_vpermilvar_ps;
2348 else if (VecWidth == 128 && EltWidth == 64)
2349 IID = Intrinsic::x86_avx_vpermilvar_pd;
2350 else if (VecWidth == 256 && EltWidth == 32)
2351 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2352 else if (VecWidth == 256 && EltWidth == 64)
2353 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2354 else if (VecWidth == 512 && EltWidth == 32)
2355 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2356 else if (VecWidth == 512 && EltWidth == 64)
2357 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2360 }
else if (Name ==
"cvtpd2dq.256") {
2361 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2362 }
else if (Name ==
"cvtpd2ps.256") {
2363 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2364 }
else if (Name ==
"cvttpd2dq.256") {
2365 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2366 }
else if (Name ==
"cvttps2dq.128") {
2367 IID = Intrinsic::x86_sse2_cvttps2dq;
2368 }
else if (Name ==
"cvttps2dq.256") {
2369 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2370 }
else if (Name.starts_with(
"permvar.")) {
2372 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2373 IID = Intrinsic::x86_avx2_permps;
2374 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2375 IID = Intrinsic::x86_avx2_permd;
2376 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2377 IID = Intrinsic::x86_avx512_permvar_df_256;
2378 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2379 IID = Intrinsic::x86_avx512_permvar_di_256;
2380 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2381 IID = Intrinsic::x86_avx512_permvar_sf_512;
2382 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2383 IID = Intrinsic::x86_avx512_permvar_si_512;
2384 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2385 IID = Intrinsic::x86_avx512_permvar_df_512;
2386 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2387 IID = Intrinsic::x86_avx512_permvar_di_512;
2388 else if (VecWidth == 128 && EltWidth == 16)
2389 IID = Intrinsic::x86_avx512_permvar_hi_128;
2390 else if (VecWidth == 256 && EltWidth == 16)
2391 IID = Intrinsic::x86_avx512_permvar_hi_256;
2392 else if (VecWidth == 512 && EltWidth == 16)
2393 IID = Intrinsic::x86_avx512_permvar_hi_512;
2394 else if (VecWidth == 128 && EltWidth == 8)
2395 IID = Intrinsic::x86_avx512_permvar_qi_128;
2396 else if (VecWidth == 256 && EltWidth == 8)
2397 IID = Intrinsic::x86_avx512_permvar_qi_256;
2398 else if (VecWidth == 512 && EltWidth == 8)
2399 IID = Intrinsic::x86_avx512_permvar_qi_512;
2402 }
else if (Name.starts_with(
"dbpsadbw.")) {
2403 if (VecWidth == 128)
2404 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2405 else if (VecWidth == 256)
2406 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2407 else if (VecWidth == 512)
2408 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2411 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2412 if (VecWidth == 128)
2413 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2414 else if (VecWidth == 256)
2415 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2416 else if (VecWidth == 512)
2417 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2420 }
else if (Name.starts_with(
"conflict.")) {
2421 if (Name[9] ==
'd' && VecWidth == 128)
2422 IID = Intrinsic::x86_avx512_conflict_d_128;
2423 else if (Name[9] ==
'd' && VecWidth == 256)
2424 IID = Intrinsic::x86_avx512_conflict_d_256;
2425 else if (Name[9] ==
'd' && VecWidth == 512)
2426 IID = Intrinsic::x86_avx512_conflict_d_512;
2427 else if (Name[9] ==
'q' && VecWidth == 128)
2428 IID = Intrinsic::x86_avx512_conflict_q_128;
2429 else if (Name[9] ==
'q' && VecWidth == 256)
2430 IID = Intrinsic::x86_avx512_conflict_q_256;
2431 else if (Name[9] ==
'q' && VecWidth == 512)
2432 IID = Intrinsic::x86_avx512_conflict_q_512;
2435 }
else if (Name.starts_with(
"pavg.")) {
2436 if (Name[5] ==
'b' && VecWidth == 128)
2437 IID = Intrinsic::x86_sse2_pavg_b;
2438 else if (Name[5] ==
'b' && VecWidth == 256)
2439 IID = Intrinsic::x86_avx2_pavg_b;
2440 else if (Name[5] ==
'b' && VecWidth == 512)
2441 IID = Intrinsic::x86_avx512_pavg_b_512;
2442 else if (Name[5] ==
'w' && VecWidth == 128)
2443 IID = Intrinsic::x86_sse2_pavg_w;
2444 else if (Name[5] ==
'w' && VecWidth == 256)
2445 IID = Intrinsic::x86_avx2_pavg_w;
2446 else if (Name[5] ==
'w' && VecWidth == 512)
2447 IID = Intrinsic::x86_avx512_pavg_w_512;
2456 Rep = Builder.CreateIntrinsic(IID, Args);
2467 if (AsmStr->find(
"mov\tfp") == 0 &&
2468 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2469 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2470 AsmStr->replace(Pos, 1,
";");
2476 Value *Rep =
nullptr;
2478 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2480 Value *Neg = Builder.CreateNeg(Arg,
"neg");
2481 Value *Cmp = Builder.CreateICmpSGE(
2483 Rep = Builder.CreateSelect(Cmp, Arg, Neg,
"abs");
2484 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2485 Type *Ty = (Name ==
"abs.bf16")
2489 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2490 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2491 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2492 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2493 : Intrinsic::nvvm_fabs;
2494 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2495 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2496 Name.starts_with(
"atomic.load.add.f64.p")) {
2501 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2502 Name.starts_with(
"atomic.load.dec.32.p")) {
2509 }
else if (Name.consume_front(
"max.") &&
2510 (Name ==
"s" || Name ==
"i" || Name ==
"ll" || Name ==
"us" ||
2511 Name ==
"ui" || Name ==
"ull")) {
2514 Value *Cmp = Name.starts_with(
"u")
2515 ? Builder.CreateICmpUGE(Arg0, Arg1,
"max.cond")
2516 : Builder.CreateICmpSGE(Arg0, Arg1,
"max.cond");
2517 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1,
"max");
2518 }
else if (Name.consume_front(
"min.") &&
2519 (Name ==
"s" || Name ==
"i" || Name ==
"ll" || Name ==
"us" ||
2520 Name ==
"ui" || Name ==
"ull")) {
2523 Value *Cmp = Name.starts_with(
"u")
2524 ? Builder.CreateICmpULE(Arg0, Arg1,
"min.cond")
2525 : Builder.CreateICmpSLE(Arg0, Arg1,
"min.cond");
2526 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1,
"min");
2527 }
else if (Name ==
"clz.ll") {
2530 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2531 {Arg, Builder.getFalse()},
2533 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2534 }
else if (Name ==
"popc.ll") {
2538 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2539 Arg,
nullptr,
"ctpop");
2540 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2541 }
else if (Name ==
"h2f") {
2542 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2545 }
else if (Name.consume_front(
"bitcast.") &&
2546 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2549 }
else if (Name ==
"rotate.b32") {
2552 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2553 {Arg, Arg, ShiftAmt});
2554 }
else if (Name ==
"rotate.b64") {
2558 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2559 {Arg, Arg, ZExtShiftAmt});
2560 }
else if (Name ==
"rotate.right.b64") {
2564 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2565 {Arg, Arg, ZExtShiftAmt});
2566 }
else if (Name ==
"swap.lo.hi.b64") {
2569 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2570 {Arg, Arg, Builder.getInt64(32)});
2571 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2574 Name.starts_with(
".to.gen"))) {
2576 }
else if (Name.consume_front(
"ldg.global")) {
2580 Value *ASC = Builder.CreateAddrSpaceCast(
Ptr, Builder.getPtrTy(1));
2583 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2585 }
else if (Name ==
"tanh.approx.f32") {
2589 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2591 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2593 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2594 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2596 }
else if (Name ==
"barrier") {
2597 Rep = Builder.CreateIntrinsic(
2598 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2600 }
else if (Name ==
"barrier.sync") {
2601 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2603 }
else if (Name ==
"barrier.sync.cnt") {
2604 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2609 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2619 ? Builder.CreateBitCast(Arg, NewType)
2622 Rep = Builder.CreateCall(NewFn, Args);
2623 if (
F->getReturnType()->isIntegerTy())
2624 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2634 Value *Rep =
nullptr;
2636 if (Name.starts_with(
"sse4a.movnt.")) {
2648 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2651 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2652 }
else if (Name.starts_with(
"avx.movnt.") ||
2653 Name.starts_with(
"avx512.storent.")) {
2665 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2666 }
else if (Name ==
"sse2.storel.dq") {
2671 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2672 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2673 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2674 }
else if (Name.starts_with(
"sse.storeu.") ||
2675 Name.starts_with(
"sse2.storeu.") ||
2676 Name.starts_with(
"avx.storeu.")) {
2679 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2680 }
else if (Name ==
"avx512.mask.store.ss") {
2684 }
else if (Name.starts_with(
"avx512.mask.store")) {
2686 bool Aligned = Name[17] !=
'u';
2689 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2692 bool CmpEq = Name[9] ==
'e';
2695 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2696 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2703 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2704 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2706 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2707 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2708 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2709 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2710 Name.starts_with(
"sse2.sqrt.p") ||
2711 Name.starts_with(
"sse.sqrt.p")) {
2712 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2713 {CI->getArgOperand(0)});
2714 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2718 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2719 : Intrinsic::x86_avx512_sqrt_pd_512;
2722 Rep = Builder.CreateIntrinsic(IID, Args);
2724 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2725 {CI->getArgOperand(0)});
2729 }
else if (Name.starts_with(
"avx512.ptestm") ||
2730 Name.starts_with(
"avx512.ptestnm")) {
2734 Rep = Builder.CreateAnd(Op0, Op1);
2740 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2742 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2745 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2748 }
else if (Name.starts_with(
"avx512.kunpck")) {
2753 for (
unsigned i = 0; i != NumElts; ++i)
2762 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2763 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2764 }
else if (Name ==
"avx512.kand.w") {
2767 Rep = Builder.CreateAnd(
LHS,
RHS);
2768 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2769 }
else if (Name ==
"avx512.kandn.w") {
2772 LHS = Builder.CreateNot(
LHS);
2773 Rep = Builder.CreateAnd(
LHS,
RHS);
2774 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2775 }
else if (Name ==
"avx512.kor.w") {
2778 Rep = Builder.CreateOr(
LHS,
RHS);
2779 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2780 }
else if (Name ==
"avx512.kxor.w") {
2783 Rep = Builder.CreateXor(
LHS,
RHS);
2784 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2785 }
else if (Name ==
"avx512.kxnor.w") {
2788 LHS = Builder.CreateNot(
LHS);
2789 Rep = Builder.CreateXor(
LHS,
RHS);
2790 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2791 }
else if (Name ==
"avx512.knot.w") {
2793 Rep = Builder.CreateNot(Rep);
2794 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2795 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
2798 Rep = Builder.CreateOr(
LHS,
RHS);
2799 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2801 if (Name[14] ==
'c')
2805 Rep = Builder.CreateICmpEQ(Rep,
C);
2806 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2807 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
2808 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
2809 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
2810 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
2813 ConstantInt::get(I32Ty, 0));
2815 ConstantInt::get(I32Ty, 0));
2817 if (Name.contains(
".add."))
2818 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2819 else if (Name.contains(
".sub."))
2820 EltOp = Builder.CreateFSub(Elt0, Elt1);
2821 else if (Name.contains(
".mul."))
2822 EltOp = Builder.CreateFMul(Elt0, Elt1);
2824 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2825 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
2826 ConstantInt::get(I32Ty, 0));
2827 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
2829 bool CmpEq = Name[16] ==
'e';
2831 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
2839 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2842 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2845 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2852 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
2857 if (VecWidth == 128 && EltWidth == 32)
2858 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2859 else if (VecWidth == 256 && EltWidth == 32)
2860 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2861 else if (VecWidth == 512 && EltWidth == 32)
2862 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2863 else if (VecWidth == 128 && EltWidth == 64)
2864 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2865 else if (VecWidth == 256 && EltWidth == 64)
2866 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2867 else if (VecWidth == 512 && EltWidth == 64)
2868 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2875 }
else if (Name.starts_with(
"avx512.cmp.p")) {
2877 Type *OpTy = Args[0]->getType();
2881 if (VecWidth == 128 && EltWidth == 32)
2882 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2883 else if (VecWidth == 256 && EltWidth == 32)
2884 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2885 else if (VecWidth == 512 && EltWidth == 32)
2886 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2887 else if (VecWidth == 128 && EltWidth == 64)
2888 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2889 else if (VecWidth == 256 && EltWidth == 64)
2890 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2891 else if (VecWidth == 512 && EltWidth == 64)
2892 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2897 if (VecWidth == 512)
2899 Args.push_back(Mask);
2901 Rep = Builder.CreateIntrinsic(IID, Args);
2902 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
2906 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
2909 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
2910 Name.starts_with(
"avx512.cvtw2mask.") ||
2911 Name.starts_with(
"avx512.cvtd2mask.") ||
2912 Name.starts_with(
"avx512.cvtq2mask.")) {
2917 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
2918 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
2919 Name.starts_with(
"avx512.mask.pabs")) {
2921 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
2922 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
2923 Name.starts_with(
"avx512.mask.pmaxs")) {
2925 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
2926 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
2927 Name.starts_with(
"avx512.mask.pmaxu")) {
2929 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
2930 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
2931 Name.starts_with(
"avx512.mask.pmins")) {
2933 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
2934 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
2935 Name.starts_with(
"avx512.mask.pminu")) {
2937 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
2938 Name ==
"avx512.pmulu.dq.512" ||
2939 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
2941 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
2942 Name ==
"avx512.pmul.dq.512" ||
2943 Name.starts_with(
"avx512.mask.pmul.dq.")) {
2945 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
2946 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
2951 }
else if (Name ==
"avx512.cvtusi2sd") {
2956 }
else if (Name ==
"sse2.cvtss2sd") {
2958 Rep = Builder.CreateFPExt(
2961 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
2962 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
2963 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
2964 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
2965 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
2966 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
2967 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
2968 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
2969 Name ==
"avx512.mask.cvtqq2ps.256" ||
2970 Name ==
"avx512.mask.cvtqq2ps.512" ||
2971 Name ==
"avx512.mask.cvtuqq2ps.256" ||
2972 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
2973 Name ==
"avx.cvt.ps2.pd.256" ||
2974 Name ==
"avx512.mask.cvtps2pd.128" ||
2975 Name ==
"avx512.mask.cvtps2pd.256") {
2980 unsigned NumDstElts = DstTy->getNumElements();
2982 assert(NumDstElts == 2 &&
"Unexpected vector size");
2983 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
2986 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2987 bool IsUnsigned = Name.contains(
"cvtu");
2989 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
2993 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2994 : Intrinsic::x86_avx512_sitofp_round;
2995 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
2998 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
2999 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3005 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3006 Name.starts_with(
"vcvtph2ps.")) {
3010 unsigned NumDstElts = DstTy->getNumElements();
3011 if (NumDstElts != SrcTy->getNumElements()) {
3012 assert(NumDstElts == 4 &&
"Unexpected vector size");
3013 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3015 Rep = Builder.CreateBitCast(
3017 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3021 }
else if (Name.starts_with(
"avx512.mask.load")) {
3023 bool Aligned = Name[16] !=
'u';
3026 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3029 ResultTy->getNumElements());
3031 Rep = Builder.CreateIntrinsic(
3032 Intrinsic::masked_expandload, ResultTy,
3034 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3040 Rep = Builder.CreateIntrinsic(
3041 Intrinsic::masked_compressstore, ResultTy,
3043 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3044 Name.starts_with(
"avx512.mask.expand.")) {
3048 ResultTy->getNumElements());
3050 bool IsCompress = Name[12] ==
'c';
3051 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3052 : Intrinsic::x86_avx512_mask_expand;
3053 Rep = Builder.CreateIntrinsic(
3055 }
else if (Name.starts_with(
"xop.vpcom")) {
3057 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3058 Name.ends_with(
"uq"))
3060 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3061 Name.ends_with(
"d") || Name.ends_with(
"q"))
3070 Name = Name.substr(9);
3071 if (Name.starts_with(
"lt"))
3073 else if (Name.starts_with(
"le"))
3075 else if (Name.starts_with(
"gt"))
3077 else if (Name.starts_with(
"ge"))
3079 else if (Name.starts_with(
"eq"))
3081 else if (Name.starts_with(
"ne"))
3083 else if (Name.starts_with(
"false"))
3085 else if (Name.starts_with(
"true"))
3092 }
else if (Name.starts_with(
"xop.vpcmov")) {
3094 Value *NotSel = Builder.CreateNot(Sel);
3097 Rep = Builder.CreateOr(Sel0, Sel1);
3098 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3099 Name.starts_with(
"avx512.mask.prol")) {
3101 }
else if (Name.starts_with(
"avx512.pror") ||
3102 Name.starts_with(
"avx512.mask.pror")) {
3104 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3105 Name.starts_with(
"avx512.mask.vpshld") ||
3106 Name.starts_with(
"avx512.maskz.vpshld")) {
3107 bool ZeroMask = Name[11] ==
'z';
3109 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3110 Name.starts_with(
"avx512.mask.vpshrd") ||
3111 Name.starts_with(
"avx512.maskz.vpshrd")) {
3112 bool ZeroMask = Name[11] ==
'z';
3114 }
else if (Name ==
"sse42.crc32.64.8") {
3117 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3119 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3120 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3121 Name.starts_with(
"avx512.vbroadcast.s")) {
3124 Type *EltTy = VecTy->getElementType();
3125 unsigned EltNum = VecTy->getNumElements();
3129 for (
unsigned I = 0;
I < EltNum; ++
I)
3130 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3131 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3132 Name.starts_with(
"sse41.pmovzx") ||
3133 Name.starts_with(
"avx2.pmovsx") ||
3134 Name.starts_with(
"avx2.pmovzx") ||
3135 Name.starts_with(
"avx512.mask.pmovsx") ||
3136 Name.starts_with(
"avx512.mask.pmovzx")) {
3138 unsigned NumDstElts = DstTy->getNumElements();
3142 for (
unsigned i = 0; i != NumDstElts; ++i)
3147 bool DoSext = Name.contains(
"pmovsx");
3149 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3154 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3155 Name ==
"avx512.mask.pmov.qd.512" ||
3156 Name ==
"avx512.mask.pmov.wb.256" ||
3157 Name ==
"avx512.mask.pmov.wb.512") {
3162 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3163 Name ==
"avx2.vbroadcasti128") {
3169 if (NumSrcElts == 2)
3170 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3172 Rep = Builder.CreateShuffleVector(Load,
3174 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3175 Name.starts_with(
"avx512.mask.shuf.f")) {
3180 unsigned ControlBitsMask = NumLanes - 1;
3181 unsigned NumControlBits = NumLanes / 2;
3184 for (
unsigned l = 0; l != NumLanes; ++l) {
3185 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3187 if (l >= NumLanes / 2)
3188 LaneMask += NumLanes;
3189 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3190 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3196 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3197 Name.starts_with(
"avx512.mask.broadcasti")) {
3200 unsigned NumDstElts =
3204 for (
unsigned i = 0; i != NumDstElts; ++i)
3205 ShuffleMask[i] = i % NumSrcElts;
3211 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3212 Name.starts_with(
"avx2.vbroadcast") ||
3213 Name.starts_with(
"avx512.pbroadcast") ||
3214 Name.starts_with(
"avx512.mask.broadcast.s")) {
3221 Rep = Builder.CreateShuffleVector(
Op, M);
3226 }
else if (Name.starts_with(
"sse2.padds.") ||
3227 Name.starts_with(
"avx2.padds.") ||
3228 Name.starts_with(
"avx512.padds.") ||
3229 Name.starts_with(
"avx512.mask.padds.")) {
3231 }
else if (Name.starts_with(
"sse2.psubs.") ||
3232 Name.starts_with(
"avx2.psubs.") ||
3233 Name.starts_with(
"avx512.psubs.") ||
3234 Name.starts_with(
"avx512.mask.psubs.")) {
3236 }
else if (Name.starts_with(
"sse2.paddus.") ||
3237 Name.starts_with(
"avx2.paddus.") ||
3238 Name.starts_with(
"avx512.mask.paddus.")) {
3240 }
else if (Name.starts_with(
"sse2.psubus.") ||
3241 Name.starts_with(
"avx2.psubus.") ||
3242 Name.starts_with(
"avx512.mask.psubus.")) {
3244 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3249 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3253 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3258 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3263 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3264 Name ==
"avx512.psll.dq.512") {
3268 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3269 Name ==
"avx512.psrl.dq.512") {
3273 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3274 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3275 Name.starts_with(
"avx2.pblendd.")) {
3280 unsigned NumElts = VecTy->getNumElements();
3283 for (
unsigned i = 0; i != NumElts; ++i)
3284 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3286 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3287 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3288 Name ==
"avx2.vinserti128" ||
3289 Name.starts_with(
"avx512.mask.insert")) {
3293 unsigned DstNumElts =
3295 unsigned SrcNumElts =
3297 unsigned Scale = DstNumElts / SrcNumElts;
3304 for (
unsigned i = 0; i != SrcNumElts; ++i)
3306 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3307 Idxs[i] = SrcNumElts;
3308 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3322 for (
unsigned i = 0; i != DstNumElts; ++i)
3325 for (
unsigned i = 0; i != SrcNumElts; ++i)
3326 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3327 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3333 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3334 Name ==
"avx2.vextracti128" ||
3335 Name.starts_with(
"avx512.mask.vextract")) {
3338 unsigned DstNumElts =
3340 unsigned SrcNumElts =
3342 unsigned Scale = SrcNumElts / DstNumElts;
3349 for (
unsigned i = 0; i != DstNumElts; ++i) {
3350 Idxs[i] = i + (Imm * DstNumElts);
3352 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3358 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3359 Name.starts_with(
"avx512.mask.perm.di.")) {
3363 unsigned NumElts = VecTy->getNumElements();
3366 for (
unsigned i = 0; i != NumElts; ++i)
3367 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3369 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3374 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3386 unsigned HalfSize = NumElts / 2;
3398 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3399 for (
unsigned i = 0; i < HalfSize; ++i)
3400 ShuffleMask[i] = StartIndex + i;
3403 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3404 for (
unsigned i = 0; i < HalfSize; ++i)
3405 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3407 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3409 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3410 Name.starts_with(
"avx512.mask.vpermil.p") ||
3411 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3415 unsigned NumElts = VecTy->getNumElements();
3417 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3418 unsigned IdxMask = ((1 << IdxSize) - 1);
3424 for (
unsigned i = 0; i != NumElts; ++i)
3425 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3427 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3432 }
else if (Name ==
"sse2.pshufl.w" ||
3433 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3439 for (
unsigned l = 0; l != NumElts; l += 8) {
3440 for (
unsigned i = 0; i != 4; ++i)
3441 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3442 for (
unsigned i = 4; i != 8; ++i)
3443 Idxs[i + l] = i + l;
3446 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3451 }
else if (Name ==
"sse2.pshufh.w" ||
3452 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3458 for (
unsigned l = 0; l != NumElts; l += 8) {
3459 for (
unsigned i = 0; i != 4; ++i)
3460 Idxs[i + l] = i + l;
3461 for (
unsigned i = 0; i != 4; ++i)
3462 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3465 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3470 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3477 unsigned HalfLaneElts = NumLaneElts / 2;
3480 for (
unsigned i = 0; i != NumElts; ++i) {
3482 Idxs[i] = i - (i % NumLaneElts);
3484 if ((i % NumLaneElts) >= HalfLaneElts)
3488 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3491 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3495 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3496 Name.starts_with(
"avx512.mask.movshdup") ||
3497 Name.starts_with(
"avx512.mask.movsldup")) {
3503 if (Name.starts_with(
"avx512.mask.movshdup."))
3507 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3508 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3509 Idxs[i + l + 0] = i + l +
Offset;
3510 Idxs[i + l + 1] = i + l +
Offset;
3513 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3517 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3518 Name.starts_with(
"avx512.mask.unpckl.")) {
3525 for (
int l = 0; l != NumElts; l += NumLaneElts)
3526 for (
int i = 0; i != NumLaneElts; ++i)
3527 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3529 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3533 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3534 Name.starts_with(
"avx512.mask.unpckh.")) {
3541 for (
int l = 0; l != NumElts; l += NumLaneElts)
3542 for (
int i = 0; i != NumLaneElts; ++i)
3543 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3545 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3549 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3550 Name.starts_with(
"avx512.mask.pand.")) {
3553 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3555 Rep = Builder.CreateBitCast(Rep, FTy);
3558 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3559 Name.starts_with(
"avx512.mask.pandn.")) {
3562 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3563 Rep = Builder.CreateAnd(Rep,
3565 Rep = Builder.CreateBitCast(Rep, FTy);
3568 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3569 Name.starts_with(
"avx512.mask.por.")) {
3572 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3574 Rep = Builder.CreateBitCast(Rep, FTy);
3577 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3578 Name.starts_with(
"avx512.mask.pxor.")) {
3581 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3583 Rep = Builder.CreateBitCast(Rep, FTy);
3586 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3590 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3594 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3598 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3599 if (Name.ends_with(
".512")) {
3601 if (Name[17] ==
's')
3602 IID = Intrinsic::x86_avx512_add_ps_512;
3604 IID = Intrinsic::x86_avx512_add_pd_512;
3606 Rep = Builder.CreateIntrinsic(
3614 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3615 if (Name.ends_with(
".512")) {
3617 if (Name[17] ==
's')
3618 IID = Intrinsic::x86_avx512_div_ps_512;
3620 IID = Intrinsic::x86_avx512_div_pd_512;
3622 Rep = Builder.CreateIntrinsic(
3630 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3631 if (Name.ends_with(
".512")) {
3633 if (Name[17] ==
's')
3634 IID = Intrinsic::x86_avx512_mul_ps_512;
3636 IID = Intrinsic::x86_avx512_mul_pd_512;
3638 Rep = Builder.CreateIntrinsic(
3646 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3647 if (Name.ends_with(
".512")) {
3649 if (Name[17] ==
's')
3650 IID = Intrinsic::x86_avx512_sub_ps_512;
3652 IID = Intrinsic::x86_avx512_sub_pd_512;
3654 Rep = Builder.CreateIntrinsic(
3662 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3663 Name.starts_with(
"avx512.mask.min.p")) &&
3664 Name.drop_front(18) ==
".512") {
3665 bool IsDouble = Name[17] ==
'd';
3666 bool IsMin = Name[13] ==
'i';
3668 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3669 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3672 Rep = Builder.CreateIntrinsic(
3677 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3679 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3680 {CI->getArgOperand(0), Builder.getInt1(false)});
3683 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3684 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3685 bool IsVariable = Name[16] ==
'v';
3686 char Size = Name[16] ==
'.' ? Name[17]
3687 : Name[17] ==
'.' ? Name[18]
3688 : Name[18] ==
'.' ? Name[19]
3692 if (IsVariable && Name[17] !=
'.') {
3693 if (
Size ==
'd' && Name[17] ==
'2')
3694 IID = Intrinsic::x86_avx2_psllv_q;
3695 else if (
Size ==
'd' && Name[17] ==
'4')
3696 IID = Intrinsic::x86_avx2_psllv_q_256;
3697 else if (
Size ==
's' && Name[17] ==
'4')
3698 IID = Intrinsic::x86_avx2_psllv_d;
3699 else if (
Size ==
's' && Name[17] ==
'8')
3700 IID = Intrinsic::x86_avx2_psllv_d_256;
3701 else if (
Size ==
'h' && Name[17] ==
'8')
3702 IID = Intrinsic::x86_avx512_psllv_w_128;
3703 else if (
Size ==
'h' && Name[17] ==
'1')
3704 IID = Intrinsic::x86_avx512_psllv_w_256;
3705 else if (Name[17] ==
'3' && Name[18] ==
'2')
3706 IID = Intrinsic::x86_avx512_psllv_w_512;
3709 }
else if (Name.ends_with(
".128")) {
3711 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3712 : Intrinsic::x86_sse2_psll_d;
3713 else if (
Size ==
'q')
3714 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3715 : Intrinsic::x86_sse2_psll_q;
3716 else if (
Size ==
'w')
3717 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3718 : Intrinsic::x86_sse2_psll_w;
3721 }
else if (Name.ends_with(
".256")) {
3723 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3724 : Intrinsic::x86_avx2_psll_d;
3725 else if (
Size ==
'q')
3726 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3727 : Intrinsic::x86_avx2_psll_q;
3728 else if (
Size ==
'w')
3729 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3730 : Intrinsic::x86_avx2_psll_w;
3735 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3736 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3737 : Intrinsic::x86_avx512_psll_d_512;
3738 else if (
Size ==
'q')
3739 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3740 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3741 : Intrinsic::x86_avx512_psll_q_512;
3742 else if (
Size ==
'w')
3743 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3744 : Intrinsic::x86_avx512_psll_w_512;
3750 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3751 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3752 bool IsVariable = Name[16] ==
'v';
3753 char Size = Name[16] ==
'.' ? Name[17]
3754 : Name[17] ==
'.' ? Name[18]
3755 : Name[18] ==
'.' ? Name[19]
3759 if (IsVariable && Name[17] !=
'.') {
3760 if (
Size ==
'd' && Name[17] ==
'2')
3761 IID = Intrinsic::x86_avx2_psrlv_q;
3762 else if (
Size ==
'd' && Name[17] ==
'4')
3763 IID = Intrinsic::x86_avx2_psrlv_q_256;
3764 else if (
Size ==
's' && Name[17] ==
'4')
3765 IID = Intrinsic::x86_avx2_psrlv_d;
3766 else if (
Size ==
's' && Name[17] ==
'8')
3767 IID = Intrinsic::x86_avx2_psrlv_d_256;
3768 else if (
Size ==
'h' && Name[17] ==
'8')
3769 IID = Intrinsic::x86_avx512_psrlv_w_128;
3770 else if (
Size ==
'h' && Name[17] ==
'1')
3771 IID = Intrinsic::x86_avx512_psrlv_w_256;
3772 else if (Name[17] ==
'3' && Name[18] ==
'2')
3773 IID = Intrinsic::x86_avx512_psrlv_w_512;
3776 }
else if (Name.ends_with(
".128")) {
3778 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3779 : Intrinsic::x86_sse2_psrl_d;
3780 else if (
Size ==
'q')
3781 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3782 : Intrinsic::x86_sse2_psrl_q;
3783 else if (
Size ==
'w')
3784 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3785 : Intrinsic::x86_sse2_psrl_w;
3788 }
else if (Name.ends_with(
".256")) {
3790 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3791 : Intrinsic::x86_avx2_psrl_d;
3792 else if (
Size ==
'q')
3793 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3794 : Intrinsic::x86_avx2_psrl_q;
3795 else if (
Size ==
'w')
3796 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3797 : Intrinsic::x86_avx2_psrl_w;
3802 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3803 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3804 : Intrinsic::x86_avx512_psrl_d_512;
3805 else if (
Size ==
'q')
3806 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3807 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3808 : Intrinsic::x86_avx512_psrl_q_512;
3809 else if (
Size ==
'w')
3810 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3811 : Intrinsic::x86_avx512_psrl_w_512;
3817 }
else if (Name.starts_with(
"avx512.mask.psra")) {
3818 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3819 bool IsVariable = Name[16] ==
'v';
3820 char Size = Name[16] ==
'.' ? Name[17]
3821 : Name[17] ==
'.' ? Name[18]
3822 : Name[18] ==
'.' ? Name[19]
3826 if (IsVariable && Name[17] !=
'.') {
3827 if (
Size ==
's' && Name[17] ==
'4')
3828 IID = Intrinsic::x86_avx2_psrav_d;
3829 else if (
Size ==
's' && Name[17] ==
'8')
3830 IID = Intrinsic::x86_avx2_psrav_d_256;
3831 else if (
Size ==
'h' && Name[17] ==
'8')
3832 IID = Intrinsic::x86_avx512_psrav_w_128;
3833 else if (
Size ==
'h' && Name[17] ==
'1')
3834 IID = Intrinsic::x86_avx512_psrav_w_256;
3835 else if (Name[17] ==
'3' && Name[18] ==
'2')
3836 IID = Intrinsic::x86_avx512_psrav_w_512;
3839 }
else if (Name.ends_with(
".128")) {
3841 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3842 : Intrinsic::x86_sse2_psra_d;
3843 else if (
Size ==
'q')
3844 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3845 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3846 : Intrinsic::x86_avx512_psra_q_128;
3847 else if (
Size ==
'w')
3848 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3849 : Intrinsic::x86_sse2_psra_w;
3852 }
else if (Name.ends_with(
".256")) {
3854 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3855 : Intrinsic::x86_avx2_psra_d;
3856 else if (
Size ==
'q')
3857 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3858 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3859 : Intrinsic::x86_avx512_psra_q_256;
3860 else if (
Size ==
'w')
3861 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3862 : Intrinsic::x86_avx2_psra_w;
3867 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3868 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3869 : Intrinsic::x86_avx512_psra_d_512;
3870 else if (
Size ==
'q')
3871 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3872 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3873 : Intrinsic::x86_avx512_psra_q_512;
3874 else if (
Size ==
'w')
3875 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3876 : Intrinsic::x86_avx512_psra_w_512;
3882 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
3884 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
3886 }
else if (Name.ends_with(
".movntdqa")) {
3890 LoadInst *LI = Builder.CreateAlignedLoad(
3895 }
else if (Name.starts_with(
"fma.vfmadd.") ||
3896 Name.starts_with(
"fma.vfmsub.") ||
3897 Name.starts_with(
"fma.vfnmadd.") ||
3898 Name.starts_with(
"fma.vfnmsub.")) {
3899 bool NegMul = Name[6] ==
'n';
3900 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
3901 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
3912 if (NegMul && !IsScalar)
3913 Ops[0] = Builder.CreateFNeg(
Ops[0]);
3914 if (NegMul && IsScalar)
3915 Ops[1] = Builder.CreateFNeg(
Ops[1]);
3917 Ops[2] = Builder.CreateFNeg(
Ops[2]);
3919 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
3923 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
3931 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
3935 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
3936 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
3937 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
3938 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
3939 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
3940 bool IsMask3 = Name[11] ==
'3';
3941 bool IsMaskZ = Name[11] ==
'z';
3943 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3944 bool NegMul = Name[2] ==
'n';
3945 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
3951 if (NegMul && (IsMask3 || IsMaskZ))
3952 A = Builder.CreateFNeg(
A);
3953 if (NegMul && !(IsMask3 || IsMaskZ))
3954 B = Builder.CreateFNeg(
B);
3956 C = Builder.CreateFNeg(
C);
3958 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
3959 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
3960 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
3967 if (Name.back() ==
'd')
3968 IID = Intrinsic::x86_avx512_vfmadd_f64;
3970 IID = Intrinsic::x86_avx512_vfmadd_f32;
3971 Rep = Builder.CreateIntrinsic(IID,
Ops);
3973 Rep = Builder.CreateFMA(
A,
B,
C);
3982 if (NegAcc && IsMask3)
3987 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
3989 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
3990 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
3991 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
3992 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
3993 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
3994 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
3995 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
3996 bool IsMask3 = Name[11] ==
'3';
3997 bool IsMaskZ = Name[11] ==
'z';
3999 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4000 bool NegMul = Name[2] ==
'n';
4001 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4007 if (NegMul && (IsMask3 || IsMaskZ))
4008 A = Builder.CreateFNeg(
A);
4009 if (NegMul && !(IsMask3 || IsMaskZ))
4010 B = Builder.CreateFNeg(
B);
4012 C = Builder.CreateFNeg(
C);
4019 if (Name[Name.size() - 5] ==
's')
4020 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4022 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4026 Rep = Builder.CreateFMA(
A,
B,
C);
4034 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4038 if (VecWidth == 128 && EltWidth == 32)
4039 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4040 else if (VecWidth == 256 && EltWidth == 32)
4041 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4042 else if (VecWidth == 128 && EltWidth == 64)
4043 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4044 else if (VecWidth == 256 && EltWidth == 64)
4045 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4051 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4052 Rep = Builder.CreateIntrinsic(IID,
Ops);
4053 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4054 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4055 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4056 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4057 bool IsMask3 = Name[11] ==
'3';
4058 bool IsMaskZ = Name[11] ==
'z';
4060 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4061 bool IsSubAdd = Name[3] ==
's';
4065 if (Name[Name.size() - 5] ==
's')
4066 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4068 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4073 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4075 Rep = Builder.CreateIntrinsic(IID,
Ops);
4084 Value *Odd = Builder.CreateCall(FMA,
Ops);
4085 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4086 Value *Even = Builder.CreateCall(FMA,
Ops);
4092 for (
int i = 0; i != NumElts; ++i)
4093 Idxs[i] = i + (i % 2) * NumElts;
4095 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4103 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4104 Name.starts_with(
"avx512.maskz.pternlog.")) {
4105 bool ZeroMask = Name[11] ==
'z';
4109 if (VecWidth == 128 && EltWidth == 32)
4110 IID = Intrinsic::x86_avx512_pternlog_d_128;
4111 else if (VecWidth == 256 && EltWidth == 32)
4112 IID = Intrinsic::x86_avx512_pternlog_d_256;
4113 else if (VecWidth == 512 && EltWidth == 32)
4114 IID = Intrinsic::x86_avx512_pternlog_d_512;
4115 else if (VecWidth == 128 && EltWidth == 64)
4116 IID = Intrinsic::x86_avx512_pternlog_q_128;
4117 else if (VecWidth == 256 && EltWidth == 64)
4118 IID = Intrinsic::x86_avx512_pternlog_q_256;
4119 else if (VecWidth == 512 && EltWidth == 64)
4120 IID = Intrinsic::x86_avx512_pternlog_q_512;
4126 Rep = Builder.CreateIntrinsic(IID, Args);
4130 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4131 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4132 bool ZeroMask = Name[11] ==
'z';
4133 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4136 if (VecWidth == 128 && !
High)
4137 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4138 else if (VecWidth == 256 && !
High)
4139 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4140 else if (VecWidth == 512 && !
High)
4141 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4142 else if (VecWidth == 128 &&
High)
4143 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4144 else if (VecWidth == 256 &&
High)
4145 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4146 else if (VecWidth == 512 &&
High)
4147 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4153 Rep = Builder.CreateIntrinsic(IID, Args);
4157 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4158 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4159 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4160 bool ZeroMask = Name[11] ==
'z';
4161 bool IndexForm = Name[17] ==
'i';
4163 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4164 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4165 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4166 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4167 bool ZeroMask = Name[11] ==
'z';
4168 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4171 if (VecWidth == 128 && !IsSaturating)
4172 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4173 else if (VecWidth == 256 && !IsSaturating)
4174 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4175 else if (VecWidth == 512 && !IsSaturating)
4176 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4177 else if (VecWidth == 128 && IsSaturating)
4178 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4179 else if (VecWidth == 256 && IsSaturating)
4180 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4181 else if (VecWidth == 512 && IsSaturating)
4182 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4192 if (Args[1]->
getType()->isVectorTy() &&
4195 ->isIntegerTy(32) &&
4196 Args[2]->
getType()->isVectorTy() &&
4199 ->isIntegerTy(32)) {
4200 Type *NewArgType =
nullptr;
4201 if (VecWidth == 128)
4203 else if (VecWidth == 256)
4205 else if (VecWidth == 512)
4210 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4211 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4214 Rep = Builder.CreateIntrinsic(IID, Args);
4218 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4219 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4220 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4221 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4222 bool ZeroMask = Name[11] ==
'z';
4223 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4226 if (VecWidth == 128 && !IsSaturating)
4227 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4228 else if (VecWidth == 256 && !IsSaturating)
4229 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4230 else if (VecWidth == 512 && !IsSaturating)
4231 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4232 else if (VecWidth == 128 && IsSaturating)
4233 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4234 else if (VecWidth == 256 && IsSaturating)
4235 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4236 else if (VecWidth == 512 && IsSaturating)
4237 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4243 Rep = Builder.CreateIntrinsic(IID, Args);
4247 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4248 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4249 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4251 if (Name[0] ==
'a' && Name.back() ==
'2')
4252 IID = Intrinsic::x86_addcarry_32;
4253 else if (Name[0] ==
'a' && Name.back() ==
'4')
4254 IID = Intrinsic::x86_addcarry_64;
4255 else if (Name[0] ==
's' && Name.back() ==
'2')
4256 IID = Intrinsic::x86_subborrow_32;
4257 else if (Name[0] ==
's' && Name.back() ==
'4')
4258 IID = Intrinsic::x86_subborrow_64;
4265 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4268 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4271 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4275 }
else if (Name.starts_with(
"avx512.mask.") &&
4285 if (Name.starts_with(
"neon.bfcvt")) {
4286 if (Name.starts_with(
"neon.bfcvtn2")) {
4288 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4290 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4291 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4294 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4295 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4297 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4301 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4302 return Builder.CreateShuffleVector(
4305 return Builder.CreateFPTrunc(CI->
getOperand(0),
4308 }
else if (Name.starts_with(
"sve.fcvt")) {
4311 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4312 .
Case(
"sve.fcvtnt.bf16f32",
4313 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4325 if (Args[1]->
getType() != BadPredTy)
4328 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4329 BadPredTy, Args[1]);
4330 Args[1] = Builder.CreateIntrinsic(
4331 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4333 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4342 if (Name ==
"mve.vctp64.old") {
4345 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4348 Value *C1 = Builder.CreateIntrinsic(
4349 Intrinsic::arm_mve_pred_v2i,
4351 return Builder.CreateIntrinsic(
4352 Intrinsic::arm_mve_pred_i2v,
4354 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4355 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4356 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4357 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4359 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4360 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4361 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4362 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4364 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4365 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4366 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4367 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4368 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4369 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4370 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4371 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4372 std::vector<Type *> Tys;
4376 case Intrinsic::arm_mve_mull_int_predicated:
4377 case Intrinsic::arm_mve_vqdmull_predicated:
4378 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4381 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4382 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4383 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4387 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4391 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4395 case Intrinsic::arm_cde_vcx1q_predicated:
4396 case Intrinsic::arm_cde_vcx1qa_predicated:
4397 case Intrinsic::arm_cde_vcx2q_predicated:
4398 case Intrinsic::arm_cde_vcx2qa_predicated:
4399 case Intrinsic::arm_cde_vcx3q_predicated:
4400 case Intrinsic::arm_cde_vcx3qa_predicated:
4407 std::vector<Value *>
Ops;
4409 Type *Ty =
Op->getType();
4410 if (Ty->getScalarSizeInBits() == 1) {
4411 Value *C1 = Builder.CreateIntrinsic(
4412 Intrinsic::arm_mve_pred_v2i,
4414 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4419 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4447 if (NumOperands < 3)
4460 bool IsVolatile =
false;
4464 if (NumOperands > 3)
4469 if (NumOperands > 5) {
4471 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4485 if (VT->getElementType()->isIntegerTy(16)) {
4488 Val = Builder.CreateBitCast(Val, AsBF16);
4496 Builder.CreateAtomicRMW(RMWOp,
Ptr, Val, std::nullopt, Order, SSID);
4498 unsigned AddrSpace = PtrTy->getAddressSpace();
4501 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4503 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4508 MDNode *RangeNotPrivate =
4511 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4517 return Builder.CreateBitCast(RMW, RetTy);
4538 return MAV->getMetadata();
4545 return I->getDebugLoc().getAsMDNode();
4553 if (Name ==
"label") {
4556 }
else if (Name ==
"assign") {
4563 }
else if (Name ==
"declare") {
4568 }
else if (Name ==
"addr") {
4578 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4580 }
else if (Name ==
"value") {
4583 unsigned ExprOp = 2;
4597 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4619 assert(Name.starts_with(
"llvm.") &&
"Intrinsic doesn't start with 'llvm.'");
4620 Name = Name.substr(5);
4622 bool IsX86 = Name.consume_front(
"x86.");
4623 bool IsNVVM = Name.consume_front(
"nvvm.");
4624 bool IsAArch64 = Name.consume_front(
"aarch64.");
4625 bool IsARM = Name.consume_front(
"arm.");
4626 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
4627 bool IsDbg = Name.consume_front(
"dbg.");
4628 Value *Rep =
nullptr;
4630 if (!IsX86 && Name ==
"stackprotectorcheck") {
4632 }
else if (IsNVVM) {
4636 }
else if (IsAArch64) {
4640 }
else if (IsAMDGCN) {
4654 const auto &DefaultCase = [&]() ->
void {
4662 "Unknown function for CallBase upgrade and isn't just a name change");
4670 "Return type must have changed");
4671 assert(OldST->getNumElements() ==
4673 "Must have same number of elements");
4676 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4679 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4680 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4681 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4700 case Intrinsic::arm_neon_vst1:
4701 case Intrinsic::arm_neon_vst2:
4702 case Intrinsic::arm_neon_vst3:
4703 case Intrinsic::arm_neon_vst4:
4704 case Intrinsic::arm_neon_vst2lane:
4705 case Intrinsic::arm_neon_vst3lane:
4706 case Intrinsic::arm_neon_vst4lane: {
4708 NewCall = Builder.CreateCall(NewFn, Args);
4711 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4712 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4713 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4718 NewCall = Builder.CreateCall(NewFn, Args);
4721 case Intrinsic::aarch64_sve_ld3_sret:
4722 case Intrinsic::aarch64_sve_ld4_sret:
4723 case Intrinsic::aarch64_sve_ld2_sret: {
4725 Name = Name.substr(5);
4732 unsigned MinElts = RetTy->getMinNumElements() /
N;
4734 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4736 for (
unsigned I = 0;
I <
N;
I++) {
4737 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
4738 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
4744 case Intrinsic::coro_end: {
4747 NewCall = Builder.CreateCall(NewFn, Args);
4751 case Intrinsic::vector_extract: {
4753 Name = Name.substr(5);
4754 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
4759 unsigned MinElts = RetTy->getMinNumElements();
4762 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
4766 case Intrinsic::vector_insert: {
4768 Name = Name.substr(5);
4769 if (!Name.starts_with(
"aarch64.sve.tuple")) {
4773 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
4778 NewCall = Builder.CreateCall(
4782 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
4788 assert(
N > 1 &&
"Create is expected to be between 2-4");
4791 unsigned MinElts = RetTy->getMinNumElements() /
N;
4792 for (
unsigned I = 0;
I <
N;
I++) {
4794 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
4801 case Intrinsic::arm_neon_bfdot:
4802 case Intrinsic::arm_neon_bfmmla:
4803 case Intrinsic::arm_neon_bfmlalb:
4804 case Intrinsic::arm_neon_bfmlalt:
4805 case Intrinsic::aarch64_neon_bfdot:
4806 case Intrinsic::aarch64_neon_bfmmla:
4807 case Intrinsic::aarch64_neon_bfmlalb:
4808 case Intrinsic::aarch64_neon_bfmlalt: {
4811 "Mismatch between function args and call args");
4812 size_t OperandWidth =
4814 assert((OperandWidth == 64 || OperandWidth == 128) &&
4815 "Unexpected operand width");
4817 auto Iter = CI->
args().begin();
4818 Args.push_back(*Iter++);
4819 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4820 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4821 NewCall = Builder.CreateCall(NewFn, Args);
4825 case Intrinsic::bitreverse:
4826 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
4829 case Intrinsic::ctlz:
4830 case Intrinsic::cttz:
4832 "Mismatch between function args and call args");
4834 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
4837 case Intrinsic::objectsize: {
4838 Value *NullIsUnknownSize =
4842 NewCall = Builder.CreateCall(
4847 case Intrinsic::ctpop:
4848 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
4851 case Intrinsic::convert_from_fp16:
4852 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
4855 case Intrinsic::dbg_value: {
4857 Name = Name.substr(5);
4859 if (Name.starts_with(
"dbg.addr")) {
4873 if (
Offset->isZeroValue()) {
4874 NewCall = Builder.CreateCall(
4883 case Intrinsic::ptr_annotation:
4891 NewCall = Builder.CreateCall(
4900 case Intrinsic::var_annotation:
4907 NewCall = Builder.CreateCall(
4916 case Intrinsic::riscv_aes32dsi:
4917 case Intrinsic::riscv_aes32dsmi:
4918 case Intrinsic::riscv_aes32esi:
4919 case Intrinsic::riscv_aes32esmi:
4920 case Intrinsic::riscv_sm4ks:
4921 case Intrinsic::riscv_sm4ed: {
4931 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4932 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4938 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4939 Value *Res = NewCall;
4941 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
4947 case Intrinsic::nvvm_mapa_shared_cluster: {
4951 Value *Res = NewCall;
4952 Res = Builder.CreateAddrSpaceCast(
4959 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
4960 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
4963 Args[0] = Builder.CreateAddrSpaceCast(
4966 NewCall = Builder.CreateCall(NewFn, Args);
4972 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
4973 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
4974 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
4975 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
4976 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
4977 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
4978 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
4979 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
4986 Args[0] = Builder.CreateAddrSpaceCast(
4995 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
4997 NewCall = Builder.CreateCall(NewFn, Args);
5003 case Intrinsic::riscv_sha256sig0:
5004 case Intrinsic::riscv_sha256sig1:
5005 case Intrinsic::riscv_sha256sum0:
5006 case Intrinsic::riscv_sha256sum1:
5007 case Intrinsic::riscv_sm3p0:
5008 case Intrinsic::riscv_sm3p1: {
5015 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5017 NewCall = Builder.CreateCall(NewFn, Arg);
5019 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5026 case Intrinsic::x86_xop_vfrcz_ss:
5027 case Intrinsic::x86_xop_vfrcz_sd:
5028 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5031 case Intrinsic::x86_xop_vpermil2pd:
5032 case Intrinsic::x86_xop_vpermil2ps:
5033 case Intrinsic::x86_xop_vpermil2pd_256:
5034 case Intrinsic::x86_xop_vpermil2ps_256: {
5038 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5039 NewCall = Builder.CreateCall(NewFn, Args);
5043 case Intrinsic::x86_sse41_ptestc:
5044 case Intrinsic::x86_sse41_ptestz:
5045 case Intrinsic::x86_sse41_ptestnzc: {
5059 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5060 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5062 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5066 case Intrinsic::x86_rdtscp: {
5072 NewCall = Builder.CreateCall(NewFn);
5074 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5077 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5085 case Intrinsic::x86_sse41_insertps:
5086 case Intrinsic::x86_sse41_dppd:
5087 case Intrinsic::x86_sse41_dpps:
5088 case Intrinsic::x86_sse41_mpsadbw:
5089 case Intrinsic::x86_avx_dp_ps_256:
5090 case Intrinsic::x86_avx2_mpsadbw: {
5096 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5097 NewCall = Builder.CreateCall(NewFn, Args);
5101 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5102 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5103 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5104 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5105 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5106 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5112 NewCall = Builder.CreateCall(NewFn, Args);
5121 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5122 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5123 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5124 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5125 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5126 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5130 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5131 Args[1] = Builder.CreateBitCast(
5134 NewCall = Builder.CreateCall(NewFn, Args);
5135 Value *Res = Builder.CreateBitCast(
5143 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5144 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5145 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5149 Args[1] = Builder.CreateBitCast(
5151 Args[2] = Builder.CreateBitCast(
5154 NewCall = Builder.CreateCall(NewFn, Args);
5158 case Intrinsic::thread_pointer: {
5159 NewCall = Builder.CreateCall(NewFn, {});
5163 case Intrinsic::memcpy:
5164 case Intrinsic::memmove:
5165 case Intrinsic::memset: {
5181 NewCall = Builder.CreateCall(NewFn, Args);
5183 AttributeList NewAttrs = AttributeList::get(
5184 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5185 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5186 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5191 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5194 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5198 case Intrinsic::lifetime_start:
5199 case Intrinsic::lifetime_end: {
5207 Ptr =
Ptr->stripPointerCasts();
5211 NewCall = Builder.CreateLifetimeStart(
Ptr);
5213 NewCall = Builder.CreateLifetimeEnd(
Ptr);
5222 case Intrinsic::x86_avx512_vpdpbusd_128:
5223 case Intrinsic::x86_avx512_vpdpbusd_256:
5224 case Intrinsic::x86_avx512_vpdpbusd_512:
5225 case Intrinsic::x86_avx512_vpdpbusds_128:
5226 case Intrinsic::x86_avx512_vpdpbusds_256:
5227 case Intrinsic::x86_avx512_vpdpbusds_512: {
5232 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5233 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5235 NewCall = Builder.CreateCall(NewFn, Args);
5239 assert(NewCall &&
"Should have either set this variable or returned through "
5240 "the default case");
5247 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5261 F->eraseFromParent();
5267 if (NumOperands == 0)
5275 if (NumOperands == 3) {
5279 Metadata *Elts2[] = {ScalarType, ScalarType,
5293 if (
Opc != Instruction::BitCast)
5297 Type *SrcTy = V->getType();
5314 if (
Opc != Instruction::BitCast)
5317 Type *SrcTy =
C->getType();
5344 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5345 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5346 if (Flag->getNumOperands() < 3)
5348 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5349 return K->getString() ==
"Debug Info Version";
5352 if (OpIt != ModFlags->op_end()) {
5353 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5360 bool BrokenDebugInfo =
false;
5363 if (!BrokenDebugInfo)
5369 M.getContext().diagnose(Diag);
5376 M.getContext().diagnose(DiagVersion);
5386 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5389 if (
F->hasFnAttribute(Attr)) {
5392 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5394 auto [Part, Rest] = S.
split(
',');
5400 const unsigned Dim = DimC -
'x';
5401 assert(Dim < 3 &&
"Unexpected dim char");
5411 F->addFnAttr(Attr, NewAttr);
5415 return S ==
"x" || S ==
"y" || S ==
"z";
5420 if (K ==
"kernel") {
5432 const unsigned Idx = (AlignIdxValuePair >> 16);
5433 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5438 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5443 if (K ==
"minctasm") {
5448 if (K ==
"maxnreg") {
5453 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5457 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5461 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5465 if (K ==
"grid_constant") {
5480 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5487 if (!SeenNodes.
insert(MD).second)
5494 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
5501 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5503 const MDOperand &V = MD->getOperand(j + 1);
5506 NewOperands.
append({K, V});
5509 if (NewOperands.
size() > 1)
5522 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
5523 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5524 if (ModRetainReleaseMarker) {
5530 ID->getString().split(ValueComp,
"#");
5531 if (ValueComp.
size() == 2) {
5532 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
5536 M.eraseNamedMetadata(ModRetainReleaseMarker);
5547 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
5573 bool InvalidCast =
false;
5575 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
5588 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
5590 Args.push_back(Arg);
5597 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5602 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
5615 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5623 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5624 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5625 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5626 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5627 {
"objc_autoreleaseReturnValue",
5628 llvm::Intrinsic::objc_autoreleaseReturnValue},
5629 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5630 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5631 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5632 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5633 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5634 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5635 {
"objc_release", llvm::Intrinsic::objc_release},
5636 {
"objc_retain", llvm::Intrinsic::objc_retain},
5637 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5638 {
"objc_retainAutoreleaseReturnValue",
5639 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5640 {
"objc_retainAutoreleasedReturnValue",
5641 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5642 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5643 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5644 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5645 {
"objc_unsafeClaimAutoreleasedReturnValue",
5646 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5647 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5648 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5649 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5650 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5651 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5652 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5653 {
"objc_arc_annotation_topdown_bbstart",
5654 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5655 {
"objc_arc_annotation_topdown_bbend",
5656 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5657 {
"objc_arc_annotation_bottomup_bbstart",
5658 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5659 {
"objc_arc_annotation_bottomup_bbend",
5660 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5662 for (
auto &
I : RuntimeFuncs)
5663 UpgradeToIntrinsic(
I.first,
I.second);
5667 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5671 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
5672 bool HasSwiftVersionFlag =
false;
5673 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5680 if (
Op->getNumOperands() != 3)
5694 if (
ID->getString() ==
"Objective-C Image Info Version")
5696 if (
ID->getString() ==
"Objective-C Class Properties")
5697 HasClassProperties =
true;
5699 if (
ID->getString() ==
"PIC Level") {
5700 if (
auto *Behavior =
5702 uint64_t V = Behavior->getLimitedValue();
5708 if (
ID->getString() ==
"PIE Level")
5709 if (
auto *Behavior =
5716 if (
ID->getString() ==
"branch-target-enforcement" ||
5717 ID->getString().starts_with(
"sign-return-address")) {
5718 if (
auto *Behavior =
5724 Op->getOperand(1),
Op->getOperand(2)};
5734 if (
ID->getString() ==
"Objective-C Image Info Section") {
5737 Value->getString().split(ValueComp,
" ");
5738 if (ValueComp.
size() != 1) {
5739 std::string NewValue;
5740 for (
auto &S : ValueComp)
5741 NewValue += S.str();
5752 if (
ID->getString() ==
"Objective-C Garbage Collection") {
5755 assert(Md->getValue() &&
"Expected non-empty metadata");
5756 auto Type = Md->getValue()->getType();
5759 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5760 if ((Val & 0xff) != Val) {
5761 HasSwiftVersionFlag =
true;
5762 SwiftABIVersion = (Val & 0xff00) >> 8;
5763 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5764 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5775 if (
ID->getString() ==
"amdgpu_code_object_version") {
5778 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
5790 if (HasObjCFlag && !HasClassProperties) {
5796 if (HasSwiftVersionFlag) {
5800 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5802 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5810 auto TrimSpaces = [](
StringRef Section) -> std::string {
5812 Section.split(Components,
',');
5817 for (
auto Component : Components)
5818 OS <<
',' << Component.trim();
5823 for (
auto &GV : M.globals()) {
5824 if (!GV.hasSection())
5829 if (!Section.starts_with(
"__DATA, __objc_catlist"))
5834 GV.setSection(TrimSpaces(Section));
5850struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
5851 StrictFPUpgradeVisitor() =
default;
5854 if (!
Call.isStrictFP())
5860 Call.removeFnAttr(Attribute::StrictFP);
5861 Call.addFnAttr(Attribute::NoBuiltin);
5866struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5867 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5868 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
5870 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
5885 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
5886 StrictFPUpgradeVisitor SFPV;
5891 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
5892 F.getReturnType(),
F.getAttributes().getRetAttrs()));
5893 for (
auto &Arg :
F.args())
5895 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
5899 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
5900 A.isValid() &&
A.isStringAttribute()) {
5901 F.setSection(
A.getValueAsString());
5902 F.removeFnAttr(
"implicit-section-name");
5909 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
5912 if (
A.getValueAsBool()) {
5913 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5919 F.removeFnAttr(
"amdgpu-unsafe-fp-atomics");
5928 if (
T->getNumOperands() < 1)
5933 return S->getString().starts_with(
"llvm.vectorizer.");
5937 StringRef OldPrefix =
"llvm.vectorizer.";
5940 if (OldTag ==
"llvm.vectorizer.unroll")
5952 if (
T->getNumOperands() < 1)
5957 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
5962 Ops.reserve(
T->getNumOperands());
5964 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
5965 Ops.push_back(
T->getOperand(
I));
5979 Ops.reserve(
T->getNumOperands());
5990 if (((
T.isAMDGPU() && !
T.isAMDGCN()) ||
5991 (
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical()))) &&
5992 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
5993 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
5996 if (
T.isLoongArch64() ||
T.isRISCV64()) {
5998 auto I =
DL.find(
"-n64-");
6000 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6004 std::string Res =
DL.str();
6008 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6009 Res.append(Res.empty() ?
"G1" :
"-G1");
6014 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6015 Res.append(
"-ni:7:8:9");
6017 if (
DL.ends_with(
"ni:7"))
6019 if (
DL.ends_with(
"ni:7:8"))
6024 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6025 Res.append(
"-p7:160:256:256:32");
6026 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6027 Res.append(
"-p8:128:128:128:48");
6028 constexpr StringRef OldP8(
"-p8:128:128-");
6029 if (
DL.contains(OldP8))
6030 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6031 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6032 Res.append(
"-p9:192:256:256:32");
6037 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6040 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6041 if (!
DL.contains(AddrSpaces)) {
6043 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6044 if (R.match(Res, &
Groups))
6050 if (
T.isAArch64()) {
6052 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6053 Res.append(
"-Fn32");
6054 AddPtr32Ptr64AddrSpaces();
6058 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6062 std::string I64 =
"-i64:64";
6063 std::string I128 =
"-i128:128";
6065 size_t Pos = Res.find(I64);
6066 if (Pos !=
size_t(-1))
6067 Res.insert(Pos + I64.size(), I128);
6075 AddPtr32Ptr64AddrSpaces();
6083 if (!
T.isOSIAMCU()) {
6084 std::string I128 =
"-i128:128";
6087 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6088 if (R.match(Res, &
Groups))
6096 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6098 auto I =
Ref.find(
"-f80:32-");
6100 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6108 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6111 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6112 B.removeAttribute(
"no-frame-pointer-elim");
6114 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6116 if (FramePointer !=
"all")
6117 FramePointer =
"non-leaf";
6118 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6120 if (!FramePointer.
empty())
6121 B.addAttribute(
"frame-pointer", FramePointer);
6123 A =
B.getAttribute(
"null-pointer-is-valid");
6126 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6127 B.removeAttribute(
"null-pointer-is-valid");
6128 if (NullPointerIsValid)
6129 B.addAttribute(Attribute::NullPointerIsValid);
6139 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
static constexpr size_t npos
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
@ ADDRESS_SPACE_SHARED_CLUSTER
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.