63#define DEBUG_TYPE "x86tti"
79 std::optional<unsigned>
179 if ((ClassID ==
GPRClass && ST->hasEGPR()) ||
195 auto *VTy = dyn_cast<FixedVectorType>(Ty);
196 if (!Ty->
isIntegerTy() && (!VTy || VTy->getNumElements() != 1))
199 switch (cast<IntegerType>(ScalarTy)->
getBitWidth()) {
216 if (ST->
hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
218 if (ST->
hasAVX() && PreferVectorWidth >= 256)
220 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
260 if (Opcode == Instruction::Mul && Ty->
isVectorTy() &&
277 assert(ISD &&
"Invalid opcode");
279 if (ISD ==
ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
280 (LT.second.getScalarType() == MVT::i32 ||
281 LT.second.getScalarType() == MVT::i64)) {
283 bool Op1Signed =
false, Op2Signed =
false;
286 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
287 bool SignedMode = Op1Signed || Op2Signed;
292 if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
293 LT.second.getScalarType() == MVT::i32) {
295 isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
297 isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
298 bool Op1Sext = isa<SExtInst>(Args[0]) &&
299 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->
hasSSE41()));
300 bool Op2Sext = isa<SExtInst>(Args[1]) &&
301 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->
hasSSE41()));
303 bool IsZeroExtended = !Op1Signed || !Op2Signed;
304 bool IsConstant = Op1Constant || Op2Constant;
305 bool IsSext = Op1Sext || Op2Sext;
306 if (IsConstant || IsZeroExtended || IsSext)
314 if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
317 if (!SignedMode && OpMinSize <= 8)
321 if (!SignedMode && OpMinSize <= 16)
328 if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
381 {
ISD::SHL, MVT::v16i8, { 1, 6, 1, 2 } },
382 {
ISD::SRL, MVT::v16i8, { 1, 6, 1, 2 } },
383 {
ISD::SRA, MVT::v16i8, { 1, 6, 1, 2 } },
384 {
ISD::SHL, MVT::v32i8, { 1, 6, 1, 2 } },
385 {
ISD::SRL, MVT::v32i8, { 1, 6, 1, 2 } },
386 {
ISD::SRA, MVT::v32i8, { 1, 6, 1, 2 } },
387 {
ISD::SHL, MVT::v64i8, { 1, 6, 1, 2 } },
388 {
ISD::SRL, MVT::v64i8, { 1, 6, 1, 2 } },
389 {
ISD::SRA, MVT::v64i8, { 1, 6, 1, 2 } },
393 if (
const auto *Entry =
395 if (
auto KindCost = Entry->Cost[
CostKind])
396 return LT.first * *KindCost;
399 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
400 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
401 {
ISD::SRA, MVT::v16i8, { 1, 8, 4, 5 } },
402 {
ISD::SHL, MVT::v32i8, { 1, 8, 2, 3 } },
403 {
ISD::SRL, MVT::v32i8, { 1, 8, 2, 3 } },
404 {
ISD::SRA, MVT::v32i8, { 1, 9, 4, 5 } },
405 {
ISD::SHL, MVT::v64i8, { 1, 8, 2, 3 } },
406 {
ISD::SRL, MVT::v64i8, { 1, 8, 2, 3 } },
407 {
ISD::SRA, MVT::v64i8, { 1, 9, 4, 6 } },
409 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
410 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
411 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
412 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
413 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
414 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
418 if (
const auto *Entry =
420 if (
auto KindCost = Entry->Cost[
CostKind])
421 return LT.first * *KindCost;
424 {
ISD::SHL, MVT::v64i8, { 2, 12, 5, 6 } },
425 {
ISD::SRL, MVT::v64i8, { 2, 12, 5, 6 } },
426 {
ISD::SRA, MVT::v64i8, { 3, 10, 12, 12 } },
428 {
ISD::SHL, MVT::v16i16, { 2, 7, 4, 4 } },
429 {
ISD::SRL, MVT::v16i16, { 2, 7, 4, 4 } },
430 {
ISD::SRA, MVT::v16i16, { 2, 7, 4, 4 } },
432 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
433 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
434 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
435 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
436 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
437 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
439 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
440 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
441 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
442 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
443 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
444 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
445 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
454 if (
const auto *Entry =
456 if (
auto KindCost = Entry->Cost[
CostKind])
457 return LT.first * *KindCost;
460 {
ISD::SHL, MVT::v16i8, { 1, 8, 2, 3 } },
461 {
ISD::SRL, MVT::v16i8, { 1, 8, 2, 3 } },
462 {
ISD::SRA, MVT::v16i8, { 2, 10, 5, 6 } },
463 {
ISD::SHL, MVT::v32i8, { 2, 8, 2, 4 } },
464 {
ISD::SRL, MVT::v32i8, { 2, 8, 2, 4 } },
465 {
ISD::SRA, MVT::v32i8, { 3, 10, 5, 9 } },
467 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
468 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
469 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
470 {
ISD::SHL, MVT::v16i16,{ 2, 2, 1, 2 } },
471 {
ISD::SRL, MVT::v16i16,{ 2, 2, 1, 2 } },
472 {
ISD::SRA, MVT::v16i16,{ 2, 2, 1, 2 } },
474 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
475 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
476 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
477 {
ISD::SHL, MVT::v8i32, { 2, 2, 1, 2 } },
478 {
ISD::SRL, MVT::v8i32, { 2, 2, 1, 2 } },
479 {
ISD::SRA, MVT::v8i32, { 2, 2, 1, 2 } },
481 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
482 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
483 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
484 {
ISD::SHL, MVT::v4i64, { 2, 2, 1, 2 } },
485 {
ISD::SRL, MVT::v4i64, { 2, 2, 1, 2 } },
486 {
ISD::SRA, MVT::v4i64, { 4, 4, 3, 6 } },
495 if (
const auto *Entry =
497 if (
auto KindCost = Entry->Cost[
CostKind])
498 return LT.first * *KindCost;
501 {
ISD::SHL, MVT::v16i8, { 2, 7, 2, 3 } },
502 {
ISD::SRL, MVT::v16i8, { 2, 7, 2, 3 } },
503 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
504 {
ISD::SHL, MVT::v32i8, { 4, 7, 7, 8 } },
505 {
ISD::SRL, MVT::v32i8, { 4, 7, 7, 8 } },
506 {
ISD::SRA, MVT::v32i8, { 7, 7, 12, 13 } },
508 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 1 } },
509 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 1 } },
510 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 1 } },
511 {
ISD::SHL, MVT::v16i16,{ 3, 6, 4, 5 } },
512 {
ISD::SRL, MVT::v16i16,{ 3, 6, 4, 5 } },
513 {
ISD::SRA, MVT::v16i16,{ 3, 6, 4, 5 } },
515 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 1 } },
516 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 1 } },
517 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 1 } },
518 {
ISD::SHL, MVT::v8i32, { 3, 6, 4, 5 } },
519 {
ISD::SRL, MVT::v8i32, { 3, 6, 4, 5 } },
520 {
ISD::SRA, MVT::v8i32, { 3, 6, 4, 5 } },
522 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 1 } },
523 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 1 } },
524 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
525 {
ISD::SHL, MVT::v4i64, { 3, 6, 4, 5 } },
526 {
ISD::SRL, MVT::v4i64, { 3, 6, 4, 5 } },
527 {
ISD::SRA, MVT::v4i64, { 5, 7, 8, 9 } },
537 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
538 if (
const auto *Entry =
540 if (
auto KindCost = Entry->Cost[
CostKind])
541 return LT.first * *KindCost;
544 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
545 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
546 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
548 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
549 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
550 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
552 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
553 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
554 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
556 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
557 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
558 {
ISD::SRA, MVT::v2i64, { 3, 5, 6, 6 } },
568 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
569 if (
const auto *Entry =
571 if (
auto KindCost = Entry->Cost[
CostKind])
572 return LT.first * *KindCost;
587 if (
const auto *Entry =
589 if (
auto KindCost = Entry->Cost[
CostKind])
590 return LT.first * *KindCost;
610 if (
const auto *Entry =
612 if (
auto KindCost = Entry->Cost[
CostKind])
613 return LT.first * *KindCost;
633 if (
const auto *Entry =
CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
634 if (
auto KindCost = Entry->Cost[
CostKind])
635 return LT.first * *KindCost;
655 if (
const auto *Entry =
CostTableLookup(AVXConstCostTable, ISD, LT.second))
656 if (
auto KindCost = Entry->Cost[
CostKind])
657 return LT.first * *KindCost;
665 if (
const auto *Entry =
667 if (
auto KindCost = Entry->Cost[
CostKind])
668 return LT.first * *KindCost;
688 if (
const auto *Entry =
CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
689 if (
auto KindCost = Entry->Cost[
CostKind])
690 return LT.first * *KindCost;
693 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
694 {
ISD::SRL, MVT::v16i8, { 3,10, 5, 8 } },
695 {
ISD::SRA, MVT::v16i8, { 4,12, 8,12 } },
696 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
697 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
698 {
ISD::SRA, MVT::v32i8, { 5,10,10,13 } },
699 {
ISD::SHL, MVT::v64i8, { 4, 7, 6, 8 } },
700 {
ISD::SRL, MVT::v64i8, { 4, 8, 7,10 } },
701 {
ISD::SRA, MVT::v64i8, { 5,10,10,15 } },
703 {
ISD::SHL, MVT::v32i16, { 2, 4, 2, 3 } },
704 {
ISD::SRL, MVT::v32i16, { 2, 4, 2, 3 } },
705 {
ISD::SRA, MVT::v32i16, { 2, 4, 2, 3 } },
709 if (
const auto *Entry =
711 if (
auto KindCost = Entry->Cost[
CostKind])
712 return LT.first * *KindCost;
715 {
ISD::SHL, MVT::v32i16, { 5,10, 5, 7 } },
716 {
ISD::SRL, MVT::v32i16, { 5,10, 5, 7 } },
717 {
ISD::SRA, MVT::v32i16, { 5,10, 5, 7 } },
719 {
ISD::SHL, MVT::v16i32, { 2, 4, 2, 3 } },
720 {
ISD::SRL, MVT::v16i32, { 2, 4, 2, 3 } },
721 {
ISD::SRA, MVT::v16i32, { 2, 4, 2, 3 } },
723 {
ISD::SRA, MVT::v2i64, { 1, 2, 1, 2 } },
724 {
ISD::SHL, MVT::v4i64, { 1, 4, 1, 2 } },
725 {
ISD::SRL, MVT::v4i64, { 1, 4, 1, 2 } },
726 {
ISD::SRA, MVT::v4i64, { 1, 4, 1, 2 } },
727 {
ISD::SHL, MVT::v8i64, { 1, 4, 1, 2 } },
728 {
ISD::SRL, MVT::v8i64, { 1, 4, 1, 2 } },
729 {
ISD::SRA, MVT::v8i64, { 1, 4, 1, 2 } },
733 if (
const auto *Entry =
735 if (
auto KindCost = Entry->Cost[
CostKind])
736 return LT.first * *KindCost;
740 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
741 {
ISD::SRL, MVT::v16i8, { 3, 9, 5, 8 } },
742 {
ISD::SRA, MVT::v16i8, { 4, 5, 9,13 } },
743 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
744 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
745 {
ISD::SRA, MVT::v32i8, { 6, 9,11,16 } },
747 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 2 } },
748 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 2 } },
749 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 2 } },
750 {
ISD::SHL, MVT::v16i16, { 2, 4, 2, 3 } },
751 {
ISD::SRL, MVT::v16i16, { 2, 4, 2, 3 } },
752 {
ISD::SRA, MVT::v16i16, { 2, 4, 2, 3 } },
754 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 2 } },
755 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 2 } },
756 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 2 } },
757 {
ISD::SHL, MVT::v8i32, { 2, 4, 2, 3 } },
758 {
ISD::SRL, MVT::v8i32, { 2, 4, 2, 3 } },
759 {
ISD::SRA, MVT::v8i32, { 2, 4, 2, 3 } },
761 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 2 } },
762 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 2 } },
763 {
ISD::SRA, MVT::v2i64, { 2, 4, 5, 7 } },
764 {
ISD::SHL, MVT::v4i64, { 2, 4, 1, 2 } },
765 {
ISD::SRL, MVT::v4i64, { 2, 4, 1, 2 } },
766 {
ISD::SRA, MVT::v4i64, { 4, 6, 5, 9 } },
770 if (
const auto *Entry =
772 if (
auto KindCost = Entry->Cost[
CostKind])
773 return LT.first * *KindCost;
776 {
ISD::SHL, MVT::v16i8, { 4, 4, 6, 8 } },
777 {
ISD::SRL, MVT::v16i8, { 4, 8, 5, 8 } },
778 {
ISD::SRA, MVT::v16i8, { 6, 6, 9,13 } },
779 {
ISD::SHL, MVT::v32i8, { 7, 8,11,14 } },
780 {
ISD::SRL, MVT::v32i8, { 7, 9,10,14 } },
781 {
ISD::SRA, MVT::v32i8, { 10,11,16,21 } },
783 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 2 } },
784 {
ISD::SRL, MVT::v8i16, { 1, 3, 1, 2 } },
785 {
ISD::SRA, MVT::v8i16, { 1, 3, 1, 2 } },
786 {
ISD::SHL, MVT::v16i16, { 3, 7, 5, 7 } },
787 {
ISD::SRL, MVT::v16i16, { 3, 7, 5, 7 } },
788 {
ISD::SRA, MVT::v16i16, { 3, 7, 5, 7 } },
790 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 2 } },
791 {
ISD::SRL, MVT::v4i32, { 1, 3, 1, 2 } },
792 {
ISD::SRA, MVT::v4i32, { 1, 3, 1, 2 } },
793 {
ISD::SHL, MVT::v8i32, { 3, 7, 5, 7 } },
794 {
ISD::SRL, MVT::v8i32, { 3, 7, 5, 7 } },
795 {
ISD::SRA, MVT::v8i32, { 3, 7, 5, 7 } },
797 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 2 } },
798 {
ISD::SRL, MVT::v2i64, { 1, 3, 1, 2 } },
799 {
ISD::SRA, MVT::v2i64, { 3, 4, 5, 7 } },
800 {
ISD::SHL, MVT::v4i64, { 3, 7, 4, 6 } },
801 {
ISD::SRL, MVT::v4i64, { 3, 7, 4, 6 } },
802 {
ISD::SRA, MVT::v4i64, { 6, 7,10,13 } },
807 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
808 if (
const auto *Entry =
810 if (
auto KindCost = Entry->Cost[
CostKind])
811 return LT.first * *KindCost;
815 {
ISD::SHL, MVT::v16i8, { 9, 10, 6, 9 } },
816 {
ISD::SRL, MVT::v16i8, { 9, 13, 5, 9 } },
817 {
ISD::SRA, MVT::v16i8, { 11, 15, 9,13 } },
819 {
ISD::SHL, MVT::v8i16, { 2, 2, 1, 2 } },
820 {
ISD::SRL, MVT::v8i16, { 2, 2, 1, 2 } },
821 {
ISD::SRA, MVT::v8i16, { 2, 2, 1, 2 } },
823 {
ISD::SHL, MVT::v4i32, { 2, 2, 1, 2 } },
824 {
ISD::SRL, MVT::v4i32, { 2, 2, 1, 2 } },
825 {
ISD::SRA, MVT::v4i32, { 2, 2, 1, 2 } },
827 {
ISD::SHL, MVT::v2i64, { 2, 2, 1, 2 } },
828 {
ISD::SRL, MVT::v2i64, { 2, 2, 1, 2 } },
829 {
ISD::SRA, MVT::v2i64, { 5, 9, 5, 7 } },
833 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
834 if (
const auto *Entry =
836 if (
auto KindCost = Entry->Cost[
CostKind])
837 return LT.first * *KindCost;
840 {
ISD::MUL, MVT::v2i64, { 2, 15, 1, 3 } },
841 {
ISD::MUL, MVT::v4i64, { 2, 15, 1, 3 } },
842 {
ISD::MUL, MVT::v8i64, { 3, 15, 1, 3 } }
847 if (
const auto *Entry =
CostTableLookup(AVX512DQCostTable, ISD, LT.second))
848 if (
auto KindCost = Entry->Cost[
CostKind])
849 return LT.first * *KindCost;
852 {
ISD::SHL, MVT::v16i8, { 4, 8, 4, 5 } },
853 {
ISD::SRL, MVT::v16i8, { 4, 8, 4, 5 } },
854 {
ISD::SRA, MVT::v16i8, { 4, 8, 4, 5 } },
855 {
ISD::SHL, MVT::v32i8, { 4, 23,11,16 } },
856 {
ISD::SRL, MVT::v32i8, { 4, 30,12,18 } },
857 {
ISD::SRA, MVT::v32i8, { 6, 13,24,30 } },
858 {
ISD::SHL, MVT::v64i8, { 6, 19,13,15 } },
859 {
ISD::SRL, MVT::v64i8, { 7, 27,15,18 } },
860 {
ISD::SRA, MVT::v64i8, { 15, 15,30,30 } },
862 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
863 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
864 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
865 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
866 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
867 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
868 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
869 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
870 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
872 {
ISD::ADD, MVT::v64i8, { 1, 1, 1, 1 } },
873 {
ISD::ADD, MVT::v32i16, { 1, 1, 1, 1 } },
875 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 1 } },
876 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 1 } },
877 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 1 } },
878 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 1 } },
880 {
ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } },
881 {
ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } },
883 {
ISD::MUL, MVT::v16i8, { 4, 12, 4, 5 } },
884 {
ISD::MUL, MVT::v32i8, { 3, 10, 7,10 } },
885 {
ISD::MUL, MVT::v64i8, { 3, 11, 7,10 } },
886 {
ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } },
888 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } },
889 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 1 } },
890 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 1 } },
891 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 1 } },
896 if (
const auto *Entry =
CostTableLookup(AVX512BWCostTable, ISD, LT.second))
897 if (
auto KindCost = Entry->Cost[
CostKind])
898 return LT.first * *KindCost;
901 {
ISD::SHL, MVT::v64i8, { 15, 19,27,33 } },
902 {
ISD::SRL, MVT::v64i8, { 15, 19,30,36 } },
903 {
ISD::SRA, MVT::v64i8, { 37, 37,51,63 } },
905 {
ISD::SHL, MVT::v32i16, { 11, 16,11,15 } },
906 {
ISD::SRL, MVT::v32i16, { 11, 16,11,15 } },
907 {
ISD::SRA, MVT::v32i16, { 11, 16,11,15 } },
909 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
910 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
911 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
912 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
913 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
914 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
915 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
916 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
917 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
919 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
920 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
921 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
922 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
923 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
924 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
925 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
926 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
927 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
929 {
ISD::ADD, MVT::v64i8, { 3, 7, 5, 5 } },
930 {
ISD::ADD, MVT::v32i16, { 3, 7, 5, 5 } },
932 {
ISD::SUB, MVT::v64i8, { 3, 7, 5, 5 } },
933 {
ISD::SUB, MVT::v32i16, { 3, 7, 5, 5 } },
935 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 1 } },
936 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 1 } },
937 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 1 } },
938 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 1 } },
940 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 1 } },
941 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 1 } },
942 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 1 } },
943 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 1 } },
945 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 1 } },
946 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 1 } },
947 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 1 } },
948 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 1 } },
950 {
ISD::MUL, MVT::v16i32, { 1, 10, 1, 2 } },
951 {
ISD::MUL, MVT::v8i32, { 1, 10, 1, 2 } },
952 {
ISD::MUL, MVT::v4i32, { 1, 10, 1, 2 } },
953 {
ISD::MUL, MVT::v8i64, { 6, 9, 8, 8 } },
958 {
ISD::FNEG, MVT::v8f64, { 1, 1, 1, 2 } },
959 {
ISD::FADD, MVT::v8f64, { 1, 4, 1, 1 } },
960 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } },
961 {
ISD::FSUB, MVT::v8f64, { 1, 4, 1, 1 } },
962 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 1 } },
963 {
ISD::FMUL, MVT::v8f64, { 1, 4, 1, 1 } },
964 {
ISD::FMUL, MVT::v4f64, { 1, 4, 1, 1 } },
965 {
ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } },
968 {
ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } },
969 {
ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } },
970 {
ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } },
971 {
ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } },
973 {
ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } },
974 {
ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } },
975 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 1 } },
976 {
ISD::FSUB, MVT::v16f32, { 1, 4, 1, 1 } },
977 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 1 } },
978 {
ISD::FMUL, MVT::v16f32, { 1, 4, 1, 1 } },
979 {
ISD::FMUL, MVT::v8f32, { 1, 4, 1, 1 } },
980 {
ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } },
983 {
ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } },
984 {
ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } },
985 {
ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } },
986 {
ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } },
990 if (
const auto *Entry =
CostTableLookup(AVX512CostTable, ISD, LT.second))
991 if (
auto KindCost = Entry->Cost[
CostKind])
992 return LT.first * *KindCost;
997 {
ISD::SHL, MVT::v4i32, { 2, 3, 1, 3 } },
998 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 3 } },
999 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 3 } },
1000 {
ISD::SHL, MVT::v8i32, { 4, 4, 1, 3 } },
1001 {
ISD::SRL, MVT::v8i32, { 4, 4, 1, 3 } },
1002 {
ISD::SRA, MVT::v8i32, { 4, 4, 1, 3 } },
1003 {
ISD::SHL, MVT::v2i64, { 2, 3, 1, 1 } },
1004 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
1005 {
ISD::SHL, MVT::v4i64, { 4, 4, 1, 2 } },
1006 {
ISD::SRL, MVT::v4i64, { 4, 4, 1, 2 } },
1018 if (ST->
hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) {
1019 if (ISD ==
ISD::SHL && LT.second == MVT::v16i16 &&
1026 if (
const auto *Entry =
CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
1027 if (
auto KindCost = Entry->Cost[
CostKind])
1028 return LT.first * *KindCost;
1033 {
ISD::SHL, MVT::v16i8, { 1, 3, 1, 1 } },
1034 {
ISD::SRL, MVT::v16i8, { 2, 3, 1, 1 } },
1035 {
ISD::SRA, MVT::v16i8, { 2, 3, 1, 1 } },
1036 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 1 } },
1037 {
ISD::SRL, MVT::v8i16, { 2, 3, 1, 1 } },
1038 {
ISD::SRA, MVT::v8i16, { 2, 3, 1, 1 } },
1039 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 1 } },
1040 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 1 } },
1041 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 1 } },
1042 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 1 } },
1043 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
1044 {
ISD::SRA, MVT::v2i64, { 2, 3, 1, 1 } },
1046 {
ISD::SHL, MVT::v32i8, { 4, 7, 5, 6 } },
1047 {
ISD::SRL, MVT::v32i8, { 6, 7, 5, 6 } },
1048 {
ISD::SRA, MVT::v32i8, { 6, 7, 5, 6 } },
1049 {
ISD::SHL, MVT::v16i16, { 4, 7, 5, 6 } },
1050 {
ISD::SRL, MVT::v16i16, { 6, 7, 5, 6 } },
1051 {
ISD::SRA, MVT::v16i16, { 6, 7, 5, 6 } },
1052 {
ISD::SHL, MVT::v8i32, { 4, 7, 5, 6 } },
1053 {
ISD::SRL, MVT::v8i32, { 6, 7, 5, 6 } },
1054 {
ISD::SRA, MVT::v8i32, { 6, 7, 5, 6 } },
1055 {
ISD::SHL, MVT::v4i64, { 4, 7, 5, 6 } },
1056 {
ISD::SRL, MVT::v4i64, { 6, 7, 5, 6 } },
1057 {
ISD::SRA, MVT::v4i64, { 6, 7, 5, 6 } },
1067 if (
const auto *Entry =
1069 if (
auto KindCost = Entry->Cost[
CostKind])
1070 return LT.first * *KindCost;
1077 if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->
hasSSE2()) ||
1078 ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->
hasAVX()))
1083 {
ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } },
1084 {
ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } },
1085 {
ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } },
1086 {
ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } },
1089 if (ST->useGLMDivSqrtCosts())
1090 if (
const auto *Entry =
CostTableLookup(GLMCostTable, ISD, LT.second))
1091 if (
auto KindCost = Entry->Cost[
CostKind])
1092 return LT.first * *KindCost;
1095 {
ISD::MUL, MVT::v4i32, { 11, 11, 1, 7 } },
1096 {
ISD::MUL, MVT::v8i16, { 2, 5, 1, 1 } },
1097 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1098 {
ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } },
1099 {
ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } },
1100 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1101 {
ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } },
1102 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } },
1103 {
ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } },
1104 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } },
1105 {
ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } },
1106 {
ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } },
1112 {
ISD::MUL, MVT::v2i64, { 17, 22, 9, 9 } },
1114 {
ISD::ADD, MVT::v2i64, { 4, 2, 1, 2 } },
1115 {
ISD::SUB, MVT::v2i64, { 4, 2, 1, 2 } },
1118 if (ST->useSLMArithCosts())
1119 if (
const auto *Entry =
CostTableLookup(SLMCostTable, ISD, LT.second))
1120 if (
auto KindCost = Entry->Cost[
CostKind])
1121 return LT.first * *KindCost;
1124 {
ISD::SHL, MVT::v16i8, { 6, 21,11,16 } },
1125 {
ISD::SHL, MVT::v32i8, { 6, 23,11,22 } },
1126 {
ISD::SHL, MVT::v8i16, { 5, 18, 5,10 } },
1127 {
ISD::SHL, MVT::v16i16, { 8, 10,10,14 } },
1129 {
ISD::SRL, MVT::v16i8, { 6, 27,12,18 } },
1130 {
ISD::SRL, MVT::v32i8, { 8, 30,12,24 } },
1131 {
ISD::SRL, MVT::v8i16, { 5, 11, 5,10 } },
1132 {
ISD::SRL, MVT::v16i16, { 8, 10,10,14 } },
1134 {
ISD::SRA, MVT::v16i8, { 17, 17,24,30 } },
1135 {
ISD::SRA, MVT::v32i8, { 18, 20,24,43 } },
1136 {
ISD::SRA, MVT::v8i16, { 5, 11, 5,10 } },
1137 {
ISD::SRA, MVT::v16i16, { 8, 10,10,14 } },
1138 {
ISD::SRA, MVT::v2i64, { 4, 5, 5, 5 } },
1139 {
ISD::SRA, MVT::v4i64, { 8, 8, 5, 9 } },
1141 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 2 } },
1142 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 2 } },
1143 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 2 } },
1144 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 2 } },
1145 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 2 } },
1146 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 2 } },
1147 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 2 } },
1148 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } },
1150 {
ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } },
1151 {
ISD::MUL, MVT::v32i8, { 4, 8, 8,16 } },
1152 {
ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } },
1153 {
ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } },
1154 {
ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } },
1155 {
ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } },
1156 {
ISD::MUL, MVT::v2i64, { 6, 10, 8, 8 } },
1160 {
ISD::FNEG, MVT::v4f64, { 1, 1, 1, 2 } },
1161 {
ISD::FNEG, MVT::v8f32, { 1, 1, 1, 2 } },
1163 {
ISD::FADD, MVT::f64, { 1, 4, 1, 1 } },
1164 {
ISD::FADD, MVT::f32, { 1, 4, 1, 1 } },
1165 {
ISD::FADD, MVT::v2f64, { 1, 4, 1, 1 } },
1166 {
ISD::FADD, MVT::v4f32, { 1, 4, 1, 1 } },
1167 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 2 } },
1168 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 2 } },
1170 {
ISD::FSUB, MVT::f64, { 1, 4, 1, 1 } },
1171 {
ISD::FSUB, MVT::f32, { 1, 4, 1, 1 } },
1172 {
ISD::FSUB, MVT::v2f64, { 1, 4, 1, 1 } },
1173 {
ISD::FSUB, MVT::v4f32, { 1, 4, 1, 1 } },
1174 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 2 } },
1175 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 2 } },
1177 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1178 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1179 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1180 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1181 {
ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } },
1182 {
ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } },
1184 {
ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } },
1185 {
ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } },
1186 {
ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } },
1187 {
ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } },
1188 {
ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } },
1189 {
ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } },
1194 if (
const auto *Entry =
CostTableLookup(AVX2CostTable, ISD, LT.second))
1195 if (
auto KindCost = Entry->Cost[
CostKind])
1196 return LT.first * *KindCost;
1202 {
ISD::MUL, MVT::v32i8, { 10, 11, 18, 19 } },
1203 {
ISD::MUL, MVT::v16i8, { 5, 6, 8, 12 } },
1204 {
ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } },
1205 {
ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } },
1206 {
ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } },
1207 {
ISD::MUL, MVT::v4i64, { 12, 15, 19, 20 } },
1209 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 2 } },
1210 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 2 } },
1211 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 2 } },
1212 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 2 } },
1214 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 2 } },
1215 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 2 } },
1216 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 2 } },
1217 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 2 } },
1219 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 2 } },
1220 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 2 } },
1221 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 2 } },
1222 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 2 } },
1224 {
ISD::SUB, MVT::v32i8, { 4, 2, 5, 6 } },
1225 {
ISD::ADD, MVT::v32i8, { 4, 2, 5, 6 } },
1226 {
ISD::SUB, MVT::v16i16, { 4, 2, 5, 6 } },
1227 {
ISD::ADD, MVT::v16i16, { 4, 2, 5, 6 } },
1228 {
ISD::SUB, MVT::v8i32, { 4, 2, 5, 6 } },
1229 {
ISD::ADD, MVT::v8i32, { 4, 2, 5, 6 } },
1230 {
ISD::SUB, MVT::v4i64, { 4, 2, 5, 6 } },
1231 {
ISD::ADD, MVT::v4i64, { 4, 2, 5, 6 } },
1232 {
ISD::SUB, MVT::v2i64, { 1, 1, 1, 1 } },
1233 {
ISD::ADD, MVT::v2i64, { 1, 1, 1, 1 } },
1235 {
ISD::SHL, MVT::v16i8, { 10, 21,11,17 } },
1236 {
ISD::SHL, MVT::v32i8, { 22, 22,27,40 } },
1237 {
ISD::SHL, MVT::v8i16, { 6, 9,11,11 } },
1238 {
ISD::SHL, MVT::v16i16, { 13, 16,24,25 } },
1239 {
ISD::SHL, MVT::v4i32, { 3, 11, 4, 6 } },
1240 {
ISD::SHL, MVT::v8i32, { 9, 11,12,17 } },
1241 {
ISD::SHL, MVT::v2i64, { 2, 4, 4, 6 } },
1242 {
ISD::SHL, MVT::v4i64, { 6, 7,11,15 } },
1244 {
ISD::SRL, MVT::v16i8, { 11, 27,12,18 } },
1245 {
ISD::SRL, MVT::v32i8, { 23, 23,30,43 } },
1246 {
ISD::SRL, MVT::v8i16, { 13, 16,14,22 } },
1247 {
ISD::SRL, MVT::v16i16, { 28, 30,31,48 } },
1248 {
ISD::SRL, MVT::v4i32, { 6, 7,12,16 } },
1249 {
ISD::SRL, MVT::v8i32, { 14, 14,26,34 } },
1250 {
ISD::SRL, MVT::v2i64, { 2, 4, 4, 6 } },
1251 {
ISD::SRL, MVT::v4i64, { 6, 7,11,15 } },
1253 {
ISD::SRA, MVT::v16i8, { 21, 22,24,36 } },
1254 {
ISD::SRA, MVT::v32i8, { 44, 45,51,76 } },
1255 {
ISD::SRA, MVT::v8i16, { 13, 16,14,22 } },
1256 {
ISD::SRA, MVT::v16i16, { 28, 30,31,48 } },
1257 {
ISD::SRA, MVT::v4i32, { 6, 7,12,16 } },
1258 {
ISD::SRA, MVT::v8i32, { 14, 14,26,34 } },
1259 {
ISD::SRA, MVT::v2i64, { 5, 6,10,14 } },
1260 {
ISD::SRA, MVT::v4i64, { 12, 12,22,30 } },
1262 {
ISD::FNEG, MVT::v4f64, { 2, 2, 1, 2 } },
1263 {
ISD::FNEG, MVT::v8f32, { 2, 2, 1, 2 } },
1265 {
ISD::FADD, MVT::f64, { 1, 5, 1, 1 } },
1266 {
ISD::FADD, MVT::f32, { 1, 5, 1, 1 } },
1267 {
ISD::FADD, MVT::v2f64, { 1, 5, 1, 1 } },
1268 {
ISD::FADD, MVT::v4f32, { 1, 5, 1, 1 } },
1269 {
ISD::FADD, MVT::v4f64, { 2, 5, 1, 2 } },
1270 {
ISD::FADD, MVT::v8f32, { 2, 5, 1, 2 } },
1272 {
ISD::FSUB, MVT::f64, { 1, 5, 1, 1 } },
1273 {
ISD::FSUB, MVT::f32, { 1, 5, 1, 1 } },
1274 {
ISD::FSUB, MVT::v2f64, { 1, 5, 1, 1 } },
1275 {
ISD::FSUB, MVT::v4f32, { 1, 5, 1, 1 } },
1276 {
ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } },
1277 {
ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } },
1279 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1280 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1281 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1282 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1283 {
ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } },
1284 {
ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } },
1286 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1287 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1288 {
ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } },
1289 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1290 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1291 {
ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } },
1295 if (
const auto *Entry =
CostTableLookup(AVX1CostTable, ISD, LT.second))
1296 if (
auto KindCost = Entry->Cost[
CostKind])
1297 return LT.first * *KindCost;
1300 {
ISD::FADD, MVT::f64, { 1, 3, 1, 1 } },
1301 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1302 {
ISD::FADD, MVT::v2f64, { 1, 3, 1, 1 } },
1303 {
ISD::FADD, MVT::v4f32, { 1, 3, 1, 1 } },
1305 {
ISD::FSUB, MVT::f64, { 1, 3, 1, 1 } },
1306 {
ISD::FSUB, MVT::f32 , { 1, 3, 1, 1 } },
1307 {
ISD::FSUB, MVT::v2f64, { 1, 3, 1, 1 } },
1308 {
ISD::FSUB, MVT::v4f32, { 1, 3, 1, 1 } },
1310 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1311 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1312 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1313 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1315 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1316 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1317 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1318 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1320 {
ISD::MUL, MVT::v2i64, { 6, 10,10,10 } }
1324 if (
const auto *Entry =
CostTableLookup(SSE42CostTable, ISD, LT.second))
1325 if (
auto KindCost = Entry->Cost[
CostKind])
1326 return LT.first * *KindCost;
1329 {
ISD::SHL, MVT::v16i8, { 15, 24,17,22 } },
1330 {
ISD::SHL, MVT::v8i16, { 11, 14,11,11 } },
1331 {
ISD::SHL, MVT::v4i32, { 14, 20, 4,10 } },
1333 {
ISD::SRL, MVT::v16i8, { 16, 27,18,24 } },
1334 {
ISD::SRL, MVT::v8i16, { 22, 26,23,27 } },
1335 {
ISD::SRL, MVT::v4i32, { 16, 17,15,19 } },
1336 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1338 {
ISD::SRA, MVT::v16i8, { 38, 41,30,36 } },
1339 {
ISD::SRA, MVT::v8i16, { 22, 26,23,27 } },
1340 {
ISD::SRA, MVT::v4i32, { 16, 17,15,19 } },
1341 {
ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } },
1343 {
ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } }
1347 if (
const auto *Entry =
CostTableLookup(SSE41CostTable, ISD, LT.second))
1348 if (
auto KindCost = Entry->Cost[
CostKind])
1349 return LT.first * *KindCost;
1352 {
ISD::MUL, MVT::v16i8, { 5, 18,10,12 } },
1356 if (
const auto *Entry =
CostTableLookup(SSSE3CostTable, ISD, LT.second))
1357 if (
auto KindCost = Entry->Cost[
CostKind])
1358 return LT.first * *KindCost;
1363 {
ISD::SHL, MVT::v16i8, { 13, 21,26,28 } },
1364 {
ISD::SHL, MVT::v8i16, { 24, 27,16,20 } },
1365 {
ISD::SHL, MVT::v4i32, { 17, 19,10,12 } },
1366 {
ISD::SHL, MVT::v2i64, { 4, 6, 5, 7 } },
1368 {
ISD::SRL, MVT::v16i8, { 14, 28,27,30 } },
1369 {
ISD::SRL, MVT::v8i16, { 16, 19,31,31 } },
1370 {
ISD::SRL, MVT::v4i32, { 12, 12,15,19 } },
1371 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1373 {
ISD::SRA, MVT::v16i8, { 27, 30,54,54 } },
1374 {
ISD::SRA, MVT::v8i16, { 16, 19,31,31 } },
1375 {
ISD::SRA, MVT::v4i32, { 12, 12,15,19 } },
1376 {
ISD::SRA, MVT::v2i64, { 8, 11,12,16 } },
1378 {
ISD::AND, MVT::v16i8, { 1, 1, 1, 1 } },
1379 {
ISD::AND, MVT::v8i16, { 1, 1, 1, 1 } },
1380 {
ISD::AND, MVT::v4i32, { 1, 1, 1, 1 } },
1381 {
ISD::AND, MVT::v2i64, { 1, 1, 1, 1 } },
1383 {
ISD::OR, MVT::v16i8, { 1, 1, 1, 1 } },
1384 {
ISD::OR, MVT::v8i16, { 1, 1, 1, 1 } },
1385 {
ISD::OR, MVT::v4i32, { 1, 1, 1, 1 } },
1386 {
ISD::OR, MVT::v2i64, { 1, 1, 1, 1 } },
1388 {
ISD::XOR, MVT::v16i8, { 1, 1, 1, 1 } },
1389 {
ISD::XOR, MVT::v8i16, { 1, 1, 1, 1 } },
1390 {
ISD::XOR, MVT::v4i32, { 1, 1, 1, 1 } },
1391 {
ISD::XOR, MVT::v2i64, { 1, 1, 1, 1 } },
1393 {
ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } },
1394 {
ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } },
1396 {
ISD::MUL, MVT::v16i8, { 6, 18,12,12 } },
1397 {
ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } },
1398 {
ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } },
1399 {
ISD::MUL, MVT::v2i64, { 7, 10,10,10 } },
1403 {
ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } },
1404 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } },
1405 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1406 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } },
1408 {
ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } },
1409 {
ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } },
1410 {
ISD::FNEG, MVT::v4f32, { 1, 1, 1, 1 } },
1411 {
ISD::FNEG, MVT::v2f64, { 1, 1, 1, 1 } },
1413 {
ISD::FADD, MVT::f32, { 2, 3, 1, 1 } },
1414 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1415 {
ISD::FADD, MVT::v2f64, { 2, 3, 1, 1 } },
1417 {
ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } },
1418 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1419 {
ISD::FSUB, MVT::v2f64, { 2, 3, 1, 1 } },
1421 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1422 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1426 if (
const auto *Entry =
CostTableLookup(SSE2CostTable, ISD, LT.second))
1427 if (
auto KindCost = Entry->Cost[
CostKind])
1428 return LT.first * *KindCost;
1431 {
ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } },
1432 {
ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } },
1434 {
ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } },
1435 {
ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } },
1437 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1438 {
ISD::FADD, MVT::v4f32, { 2, 3, 1, 1 } },
1440 {
ISD::FSUB, MVT::f32, { 1, 3, 1, 1 } },
1441 {
ISD::FSUB, MVT::v4f32, { 2, 3, 1, 1 } },
1443 {
ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } },
1444 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1448 if (
const auto *Entry =
CostTableLookup(SSE1CostTable, ISD, LT.second))
1449 if (
auto KindCost = Entry->Cost[
CostKind])
1450 return LT.first * *KindCost;
1455 {
ISD::MUL, MVT::i64, { 2, 6, 1, 2 } },
1460 if (
auto KindCost = Entry->Cost[
CostKind])
1461 return LT.first * *KindCost;
1472 {
ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
1473 {
ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
1474 {
ISD::MUL, MVT::i32, { 1, 4, 1, 1 } },
1476 {
ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } },
1477 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1478 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1479 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1480 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1484 if (
auto KindCost = Entry->Cost[
CostKind])
1485 return LT.first * *KindCost;
1499 return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
1541 "Expected the Mask to match the return size if given");
1543 "Expected the same scalar types");
1552 if (!Args.empty() &&
1553 all_of(Args, [](
const Value *Arg) {
return isa<Constant>(Arg); }))
1576 using namespace PatternMatch;
1579 (ST->
hasAVX() && LT.second.getScalarSizeInBits() >= 32)))
1586 bool IsInLaneShuffle =
false;
1587 bool IsSingleElementMask =
false;
1593 unsigned NumEltsPerLane = Mask.size() / NumLanes;
1594 if ((Mask.size() % NumLanes) == 0) {
1597 ((
P.value() % Mask.size()) / NumEltsPerLane) ==
1598 (
P.index() / NumEltsPerLane);
1600 IsSingleElementMask =
1601 (Mask.size() - 1) ==
static_cast<unsigned>(
count_if(Mask, [](
int M) {
1608 if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
1609 LT.second = LT.second.changeVectorElementType(MVT::f16);
1614 int NumElts = LT.second.getVectorNumElements();
1615 if ((Index % NumElts) == 0)
1618 if (SubLT.second.isVector()) {
1619 int NumSubElts = SubLT.second.getVectorNumElements();
1620 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1628 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
1629 if (NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 &&
1630 (NumSubElts % OrigSubElts) == 0 &&
1631 LT.second.getVectorElementType() ==
1632 SubLT.second.getVectorElementType() &&
1633 LT.second.getVectorElementType().getSizeInBits() ==
1635 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1636 "Unexpected number of elements!");
1638 LT.second.getVectorNumElements());
1640 SubLT.second.getVectorNumElements());
1641 int ExtractIndex =
alignDown((Index % NumElts), NumSubElts);
1644 ExtractIndex, SubTy);
1649 return ExtractCost + 1;
1652 "Unexpected vector size");
1654 return ExtractCost + 2;
1668 int NumElts = DstLT.second.getVectorNumElements();
1670 if (SubLT.second.isVector()) {
1671 int NumSubElts = SubLT.second.getVectorNumElements();
1672 bool MatchingTypes =
1673 NumElts == NumSubElts &&
1675 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1682 if (LT.first == 1 && LT.second == MVT::v4f32 && SubLT.first == 1 &&
1683 SubLT.second == MVT::f32 && (Index == 0 || ST->
hasSSE41()))
1728 if (
const auto *Entry =
1730 if (
auto KindCost = Entry->Cost[
CostKind])
1731 return LT.first * *KindCost;
1737 if (LT.first != 1) {
1738 MVT LegalVT = LT.second;
1743 cast<FixedVectorType>(SrcTy)->getNumElements()) {
1747 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1754 if (!Mask.empty() && NumOfDests.
isValid()) {
1772 unsigned E = NumOfDests.
getValue();
1773 unsigned NormalizedVF =
1779 unsigned PrevSrcReg = 0;
1783 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1784 [
this, SingleOpTy,
CostKind, &PrevSrcReg, &PrevRegMask,
1789 if (PrevRegMask.
empty() || PrevSrcReg != SrcReg ||
1790 PrevRegMask != RegMask)
1793 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
1799 if (SrcReg != DestReg &&
1804 PrevSrcReg = SrcReg;
1805 PrevRegMask = RegMask;
1811 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
1828 if (LT.first == 1 && IsInLaneShuffle && IsSingleElementMask)
1842 if (
const auto *Entry =
1844 if (
auto KindCost = Entry->Cost[
CostKind])
1845 return LT.first * *KindCost;
1879 if (
const auto *Entry =
1881 if (
auto KindCost = Entry->Cost[
CostKind])
1882 return LT.first * *KindCost;
1966 if (
const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1967 if (
auto KindCost = Entry->Cost[
CostKind])
1968 return LT.first * *KindCost;
1984 if (IsInLaneShuffle && ST->
hasAVX2())
1985 if (
const auto *Entry =
1987 if (
auto KindCost = Entry->Cost[
CostKind])
1988 return LT.first * *KindCost;
2038 if (
const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
2039 if (
auto KindCost = Entry->Cost[
CostKind])
2040 return LT.first * *KindCost;
2062 if (
const auto *Entry =
CostTableLookup(XOPShuffleTbl, Kind, LT.second))
2063 if (
auto KindCost = Entry->Cost[
CostKind])
2064 return LT.first * *KindCost;
2091 if (IsInLaneShuffle && ST->
hasAVX())
2092 if (
const auto *Entry =
2094 if (
auto KindCost = Entry->Cost[
CostKind])
2095 return LT.first * *KindCost;
2157 if (
const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
2158 if (
auto KindCost = Entry->Cost[
CostKind])
2159 return LT.first * *KindCost;
2172 if (
const auto *Entry =
CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
2173 if (
auto KindCost = Entry->Cost[
CostKind])
2174 return LT.first * *KindCost;
2205 if (
const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
2206 if (
auto KindCost = Entry->Cost[
CostKind])
2207 return LT.first * *KindCost;
2263 llvm::any_of(Args, [](
const auto &V) {
return isa<LoadInst>(V); });
2265 if (
const auto *Entry =
2268 LT.second.getVectorElementCount()) &&
2269 "Table entry missing from isLegalBroadcastLoad()");
2270 return LT.first * Entry->Cost;
2273 if (
const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
2274 if (
auto KindCost = Entry->Cost[
CostKind])
2275 return LT.first * *KindCost;
2288 if (LT.first == 1 && LT.second == MVT::v4f32 && Mask.size() == 4) {
2290 auto MatchSHUFPS = [](
int X,
int Y) {
2291 return X < 0 ||
Y < 0 || ((
X & 4) == (
Y & 4));
2293 if (MatchSHUFPS(Mask[0], Mask[1]) && MatchSHUFPS(Mask[2], Mask[3]))
2296 if (
const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
2297 if (
auto KindCost = Entry->Cost[
CostKind])
2298 return LT.first * *KindCost;
2311 assert(ISD &&
"Invalid opcode");
2438 {
ISD::FP_ROUND, MVT::v16f16, MVT::v16f32, { 1, 1, 1, 1 } },
2460 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 2, 1, 1, 1 } },
2461 {
ISD::TRUNCATE, MVT::v32i16, MVT::v16i32, { 2, 1, 1, 1 } },
2808 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 4, 1, 1, 1 } },
2885 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 6, 1, 1, 1 } },
3109 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, {10, 1, 1, 1 } },
3138 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3139 if (
auto KindCost = Entry->Cost[
CostKind])
3144 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3145 if (
auto KindCost = Entry->Cost[
CostKind])
3150 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3151 if (
auto KindCost = Entry->Cost[
CostKind])
3157 AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3158 if (
auto KindCost = Entry->Cost[
CostKind])
3163 AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3164 if (
auto KindCost = Entry->Cost[
CostKind])
3169 SimpleDstTy, SimpleSrcTy))
3170 if (
auto KindCost = Entry->Cost[
CostKind])
3175 SimpleDstTy, SimpleSrcTy))
3176 if (
auto KindCost = Entry->Cost[
CostKind])
3182 SimpleDstTy, SimpleSrcTy))
3183 if (
auto KindCost = Entry->Cost[
CostKind])
3187 if (ST->hasF16C()) {
3189 SimpleDstTy, SimpleSrcTy))
3190 if (
auto KindCost = Entry->Cost[
CostKind])
3196 SimpleDstTy, SimpleSrcTy))
3197 if (
auto KindCost = Entry->Cost[
CostKind])
3203 SimpleDstTy, SimpleSrcTy))
3204 if (
auto KindCost = Entry->Cost[
CostKind])
3227 AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
3228 if (
auto KindCost = Entry->Cost[
CostKind])
3229 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3233 AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
3234 if (
auto KindCost = Entry->Cost[
CostKind])
3235 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3239 AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
3240 if (
auto KindCost = Entry->Cost[
CostKind])
3241 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3246 LTDest.second, LTSrc.second))
3247 if (
auto KindCost = Entry->Cost[
CostKind])
3248 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3252 LTDest.second, LTSrc.second))
3253 if (
auto KindCost = Entry->Cost[
CostKind])
3254 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3258 LTDest.second, LTSrc.second))
3259 if (
auto KindCost = Entry->Cost[
CostKind])
3260 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3264 LTDest.second, LTSrc.second))
3265 if (
auto KindCost = Entry->Cost[
CostKind])
3266 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3270 LTDest.second, LTSrc.second))
3271 if (
auto KindCost = Entry->Cost[
CostKind])
3272 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3274 if (ST->hasF16C()) {
3276 LTDest.second, LTSrc.second))
3277 if (
auto KindCost = Entry->Cost[
CostKind])
3278 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3283 LTDest.second, LTSrc.second))
3284 if (
auto KindCost = Entry->Cost[
CostKind])
3285 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3289 LTDest.second, LTSrc.second))
3290 if (
auto KindCost = Entry->Cost[
CostKind])
3291 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3296 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
3297 Type *ExtSrc = Src->getWithNewBitWidth(32);
3303 if (!(Src->isIntegerTy() &&
I && isa<LoadInst>(
I->getOperand(0))))
3313 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
3314 Type *TruncDst = Dst->getWithNewBitWidth(32);
3324 return Cost == 0 ? 0 :
N;
3338 Op1Info, Op2Info,
I);
3343 MVT MTy = LT.second;
3346 assert(ISD &&
"Invalid opcode");
3349 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
3362 Pred = cast<CmpInst>(
I)->getPredicate();
3364 bool CmpWithConstant =
false;
3365 if (
auto *CmpInstr = dyn_cast_or_null<CmpInst>(
I))
3366 CmpWithConstant = isa<Constant>(CmpInstr->getOperand(1));
3371 ExtraCost = CmpWithConstant ? 0 : 1;
3376 ExtraCost = CmpWithConstant ? 0 : 1;
3382 ExtraCost = CmpWithConstant ? 1 : 2;
3393 ExtraCost = CmpWithConstant ? 2 : 3;
3400 if (CondTy && !ST->
hasAVX())
3571 if (ST->useSLMArithCosts())
3573 if (
auto KindCost = Entry->Cost[
CostKind])
3574 return LT.first * (ExtraCost + *KindCost);
3578 if (
auto KindCost = Entry->Cost[
CostKind])
3579 return LT.first * (ExtraCost + *KindCost);
3583 if (
auto KindCost = Entry->Cost[
CostKind])
3584 return LT.first * (ExtraCost + *KindCost);
3588 if (
auto KindCost = Entry->Cost[
CostKind])
3589 return LT.first * (ExtraCost + *KindCost);
3593 if (
auto KindCost = Entry->Cost[
CostKind])
3594 return LT.first * (ExtraCost + *KindCost);
3598 if (
auto KindCost = Entry->Cost[
CostKind])
3599 return LT.first * (ExtraCost + *KindCost);
3603 if (
auto KindCost = Entry->Cost[
CostKind])
3604 return LT.first * (ExtraCost + *KindCost);
3608 if (
auto KindCost = Entry->Cost[
CostKind])
3609 return LT.first * (ExtraCost + *KindCost);
3613 if (
auto KindCost = Entry->Cost[
CostKind])
3614 return LT.first * (ExtraCost + *KindCost);
3618 if (
auto KindCost = Entry->Cost[
CostKind])
3619 return LT.first * (ExtraCost + *KindCost);
3627 Op1Info, Op2Info,
I);
3645 {
ISD::FSHL, MVT::v8i64, { 1, 1, 1, 1 } },
3646 {
ISD::FSHL, MVT::v4i64, { 1, 1, 1, 1 } },
3647 {
ISD::FSHL, MVT::v2i64, { 1, 1, 1, 1 } },
3648 {
ISD::FSHL, MVT::v16i32, { 1, 1, 1, 1 } },
3649 {
ISD::FSHL, MVT::v8i32, { 1, 1, 1, 1 } },
3650 {
ISD::FSHL, MVT::v4i32, { 1, 1, 1, 1 } },
3651 {
ISD::FSHL, MVT::v32i16, { 1, 1, 1, 1 } },
3652 {
ISD::FSHL, MVT::v16i16, { 1, 1, 1, 1 } },
3653 {
ISD::FSHL, MVT::v8i16, { 1, 1, 1, 1 } },
3654 {
ISD::ROTL, MVT::v32i16, { 1, 1, 1, 1 } },
3655 {
ISD::ROTL, MVT::v16i16, { 1, 1, 1, 1 } },
3656 {
ISD::ROTL, MVT::v8i16, { 1, 1, 1, 1 } },
3657 {
ISD::ROTR, MVT::v32i16, { 1, 1, 1, 1 } },
3658 {
ISD::ROTR, MVT::v16i16, { 1, 1, 1, 1 } },
3659 {
ISD::ROTR, MVT::v8i16, { 1, 1, 1, 1 } },
3681 {
ISD::CTLZ, MVT::v8i64, { 1, 5, 1, 1 } },
3682 {
ISD::CTLZ, MVT::v16i32, { 1, 5, 1, 1 } },
3683 {
ISD::CTLZ, MVT::v32i16, { 18, 27, 23, 27 } },
3684 {
ISD::CTLZ, MVT::v64i8, { 3, 16, 9, 11 } },
3685 {
ISD::CTLZ, MVT::v4i64, { 1, 5, 1, 1 } },
3686 {
ISD::CTLZ, MVT::v8i32, { 1, 5, 1, 1 } },
3687 {
ISD::CTLZ, MVT::v16i16, { 8, 19, 11, 13 } },
3688 {
ISD::CTLZ, MVT::v32i8, { 2, 11, 9, 10 } },
3689 {
ISD::CTLZ, MVT::v2i64, { 1, 5, 1, 1 } },
3690 {
ISD::CTLZ, MVT::v4i32, { 1, 5, 1, 1 } },
3691 {
ISD::CTLZ, MVT::v8i16, { 3, 15, 4, 6 } },
3692 {
ISD::CTLZ, MVT::v16i8, { 2, 10, 9, 10 } },
3694 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3695 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3696 {
ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } },
3697 {
ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } },
3698 {
ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } },
3699 {
ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } },
3702 {
ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
3703 {
ISD::ABS, MVT::v64i8, { 1, 1, 1, 1 } },
3725 {
ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
3726 {
ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
3727 {
ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
3728 {
ISD::CTLZ, MVT::v64i8, { 3, 12, 10, 9 } },
3729 {
ISD::CTPOP, MVT::v2i64, { 3, 7, 10, 10 } },
3730 {
ISD::CTPOP, MVT::v4i64, { 3, 7, 10, 10 } },
3731 {
ISD::CTPOP, MVT::v8i64, { 3, 8, 10, 12 } },
3732 {
ISD::CTPOP, MVT::v4i32, { 7, 11, 14, 14 } },
3733 {
ISD::CTPOP, MVT::v8i32, { 7, 11, 14, 14 } },
3734 {
ISD::CTPOP, MVT::v16i32, { 7, 12, 14, 16 } },
3735 {
ISD::CTPOP, MVT::v8i16, { 2, 7, 11, 11 } },
3736 {
ISD::CTPOP, MVT::v16i16, { 2, 7, 11, 11 } },
3737 {
ISD::CTPOP, MVT::v32i16, { 3, 7, 11, 13 } },
3741 {
ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } },
3742 {
ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } },
3743 {
ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } },
3744 {
ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } },
3745 {
ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } },
3746 {
ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } },
3747 {
ISD::ROTL, MVT::v32i16, { 2, 8, 6, 8 } },
3748 {
ISD::ROTL, MVT::v16i16, { 2, 8, 6, 7 } },
3749 {
ISD::ROTL, MVT::v8i16, { 2, 7, 6, 7 } },
3750 {
ISD::ROTL, MVT::v64i8, { 5, 6, 11, 12 } },
3751 {
ISD::ROTL, MVT::v32i8, { 5, 15, 7, 10 } },
3752 {
ISD::ROTL, MVT::v16i8, { 5, 15, 7, 10 } },
3753 {
ISD::ROTR, MVT::v32i16, { 2, 8, 6, 8 } },
3754 {
ISD::ROTR, MVT::v16i16, { 2, 8, 6, 7 } },
3755 {
ISD::ROTR, MVT::v8i16, { 2, 7, 6, 7 } },
3756 {
ISD::ROTR, MVT::v64i8, { 5, 6, 12, 14 } },
3757 {
ISD::ROTR, MVT::v32i8, { 5, 14, 6, 9 } },
3758 {
ISD::ROTR, MVT::v16i8, { 5, 14, 6, 9 } },
3767 {
ISD::SMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3768 {
ISD::SMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3769 {
ISD::SMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3770 {
ISD::SMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3772 {
ISD::SMULO, MVT::v64i8, { 8, 21, 17, 18 } },
3774 {
ISD::UMULO, MVT::v64i8, { 8, 15, 15, 16 } },
3779 {
ISD::UMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3780 {
ISD::UMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3781 {
ISD::UMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3782 {
ISD::UMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3787 {
ISD::ABS, MVT::v8i64, { 1, 1, 1, 1 } },
3788 {
ISD::ABS, MVT::v4i64, { 1, 1, 1, 1 } },
3789 {
ISD::ABS, MVT::v2i64, { 1, 1, 1, 1 } },
3790 {
ISD::ABS, MVT::v16i32, { 1, 1, 1, 1 } },
3791 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 1 } },
3792 {
ISD::ABS, MVT::v32i16, { 2, 7, 4, 4 } },
3793 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 1 } },
3794 {
ISD::ABS, MVT::v64i8, { 2, 7, 4, 4 } },
3795 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 1 } },
3803 {
ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
3804 {
ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
3805 {
ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
3806 {
ISD::CTLZ, MVT::v64i8, { 6, 11, 19, 19 } },
3807 {
ISD::CTPOP, MVT::v8i64, { 16, 16, 19, 19 } },
3808 {
ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } },
3809 {
ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } },
3810 {
ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } },
3811 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3812 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3813 {
ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } },
3814 {
ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } },
3815 {
ISD::ROTL, MVT::v8i64, { 1, 1, 1, 1 } },
3816 {
ISD::ROTL, MVT::v4i64, { 1, 1, 1, 1 } },
3817 {
ISD::ROTL, MVT::v2i64, { 1, 1, 1, 1 } },
3818 {
ISD::ROTL, MVT::v16i32, { 1, 1, 1, 1 } },
3819 {
ISD::ROTL, MVT::v8i32, { 1, 1, 1, 1 } },
3820 {
ISD::ROTL, MVT::v4i32, { 1, 1, 1, 1 } },
3821 {
ISD::ROTR, MVT::v8i64, { 1, 1, 1, 1 } },
3822 {
ISD::ROTR, MVT::v4i64, { 1, 1, 1, 1 } },
3823 {
ISD::ROTR, MVT::v2i64, { 1, 1, 1, 1 } },
3824 {
ISD::ROTR, MVT::v16i32, { 1, 1, 1, 1 } },
3825 {
ISD::ROTR, MVT::v8i32, { 1, 1, 1, 1 } },
3826 {
ISD::ROTR, MVT::v4i32, { 1, 1, 1, 1 } },
3841 {
ISD::SMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3842 {
ISD::SMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3843 {
ISD::SMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3844 {
ISD::SMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3845 {
ISD::SMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3846 {
ISD::SMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3847 {
ISD::SMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3848 {
ISD::SMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3849 {
ISD::SMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3850 {
ISD::SMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3851 {
ISD::SMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3852 {
ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3853 {
ISD::SMULO, MVT::v8i64, { 44, 44, 81, 93 } },
3854 {
ISD::SMULO, MVT::v16i32, { 5, 12, 9, 11 } },
3855 {
ISD::SMULO, MVT::v32i16, { 6, 12, 17, 17 } },
3856 {
ISD::SMULO, MVT::v64i8, { 22, 28, 42, 42 } },
3865 {
ISD::UMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3866 {
ISD::UMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3867 {
ISD::UMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3868 {
ISD::UMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3869 {
ISD::UMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3870 {
ISD::UMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3871 {
ISD::UMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3872 {
ISD::UMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3873 {
ISD::UMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3874 {
ISD::UMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3875 {
ISD::UMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3876 {
ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3877 {
ISD::UMULO, MVT::v8i64, { 52, 52, 95, 104} },
3878 {
ISD::UMULO, MVT::v16i32, { 5, 12, 8, 10 } },
3879 {
ISD::UMULO, MVT::v32i16, { 5, 13, 16, 16 } },
3880 {
ISD::UMULO, MVT::v64i8, { 18, 24, 30, 30 } },
3907 {
ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } },
3910 {
ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } },
3911 {
ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } },
3927 {
ISD::ROTL, MVT::v4i64, { 4, 7, 5, 6 } },
3928 {
ISD::ROTL, MVT::v8i32, { 4, 7, 5, 6 } },
3929 {
ISD::ROTL, MVT::v16i16, { 4, 7, 5, 6 } },
3930 {
ISD::ROTL, MVT::v32i8, { 4, 7, 5, 6 } },
3931 {
ISD::ROTL, MVT::v2i64, { 1, 3, 1, 1 } },
3932 {
ISD::ROTL, MVT::v4i32, { 1, 3, 1, 1 } },
3933 {
ISD::ROTL, MVT::v8i16, { 1, 3, 1, 1 } },
3934 {
ISD::ROTL, MVT::v16i8, { 1, 3, 1, 1 } },
3935 {
ISD::ROTR, MVT::v4i64, { 4, 7, 8, 9 } },
3936 {
ISD::ROTR, MVT::v8i32, { 4, 7, 8, 9 } },
3937 {
ISD::ROTR, MVT::v16i16, { 4, 7, 8, 9 } },
3938 {
ISD::ROTR, MVT::v32i8, { 4, 7, 8, 9 } },
3939 {
ISD::ROTR, MVT::v2i64, { 1, 3, 3, 3 } },
3940 {
ISD::ROTR, MVT::v4i32, { 1, 3, 3, 3 } },
3941 {
ISD::ROTR, MVT::v8i16, { 1, 3, 3, 3 } },
3942 {
ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } },
3953 {
ISD::ABS, MVT::v2i64, { 2, 4, 3, 5 } },
3954 {
ISD::ABS, MVT::v4i64, { 2, 4, 3, 5 } },
3955 {
ISD::ABS, MVT::v4i32, { 1, 1, 1, 1 } },
3956 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 2 } },
3957 {
ISD::ABS, MVT::v8i16, { 1, 1, 1, 1 } },
3958 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 2 } },
3959 {
ISD::ABS, MVT::v16i8, { 1, 1, 1, 1 } },
3960 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 2 } },
3975 {
ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
3976 {
ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
3977 {
ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
3978 {
ISD::CTLZ, MVT::v8i32, { 10, 16, 19, 34 } },
3979 {
ISD::CTLZ, MVT::v8i16, { 4, 13, 14, 15 } },
3980 {
ISD::CTLZ, MVT::v16i16, { 6, 14, 14, 24 } },
3981 {
ISD::CTLZ, MVT::v16i8, { 3, 12, 9, 10 } },
3982 {
ISD::CTLZ, MVT::v32i8, { 4, 12, 9, 14 } },
3983 {
ISD::CTPOP, MVT::v2i64, { 3, 9, 10, 10 } },
3984 {
ISD::CTPOP, MVT::v4i64, { 4, 9, 10, 14 } },
3985 {
ISD::CTPOP, MVT::v4i32, { 7, 12, 14, 14 } },
3986 {
ISD::CTPOP, MVT::v8i32, { 7, 12, 14, 18 } },
3987 {
ISD::CTPOP, MVT::v8i16, { 3, 7, 11, 11 } },
3988 {
ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } },
3991 {
ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } },
3992 {
ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } },
3993 {
ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } },
3994 {
ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } },
3995 {
ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } },
3996 {
ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } },
3997 {
ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } },
3998 {
ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } },
4005 {
ISD::SMAX, MVT::v2i64, { 2, 7, 2, 3 } },
4006 {
ISD::SMAX, MVT::v4i64, { 2, 7, 2, 3 } },
4007 {
ISD::SMAX, MVT::v8i32, { 1, 1, 1, 2 } },
4008 {
ISD::SMAX, MVT::v16i16, { 1, 1, 1, 2 } },
4009 {
ISD::SMAX, MVT::v32i8, { 1, 1, 1, 2 } },
4010 {
ISD::SMIN, MVT::v2i64, { 2, 7, 2, 3 } },
4011 {
ISD::SMIN, MVT::v4i64, { 2, 7, 2, 3 } },
4012 {
ISD::SMIN, MVT::v8i32, { 1, 1, 1, 2 } },
4013 {
ISD::SMIN, MVT::v16i16, { 1, 1, 1, 2 } },
4014 {
ISD::SMIN, MVT::v32i8, { 1, 1, 1, 2 } },
4015 {
ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
4016 {
ISD::SMULO, MVT::v2i64, { 8, 8, 13, 15 } },
4017 {
ISD::SMULO, MVT::v8i32, { 8, 20, 13, 24 } },
4018 {
ISD::SMULO, MVT::v4i32, { 5, 15, 11, 12 } },
4019 {
ISD::SMULO, MVT::v16i16, { 4, 14, 8, 14 } },
4021 {
ISD::SMULO, MVT::v32i8, { 9, 15, 18, 35 } },
4022 {
ISD::SMULO, MVT::v16i8, { 6, 22, 14, 21 } },
4034 {
ISD::UMAX, MVT::v2i64, { 2, 8, 5, 6 } },
4035 {
ISD::UMAX, MVT::v4i64, { 2, 8, 5, 8 } },
4036 {
ISD::UMAX, MVT::v8i32, { 1, 1, 1, 2 } },
4037 {
ISD::UMAX, MVT::v16i16, { 1, 1, 1, 2 } },
4038 {
ISD::UMAX, MVT::v32i8, { 1, 1, 1, 2 } },
4039 {
ISD::UMIN, MVT::v2i64, { 2, 8, 5, 6 } },
4040 {
ISD::UMIN, MVT::v4i64, { 2, 8, 5, 8 } },
4041 {
ISD::UMIN, MVT::v8i32, { 1, 1, 1, 2 } },
4042 {
ISD::UMIN, MVT::v16i16, { 1, 1, 1, 2 } },
4043 {
ISD::UMIN, MVT::v32i8, { 1, 1, 1, 2 } },
4044 {
ISD::UMULO, MVT::v4i64, { 24, 24, 39, 43 } },
4045 {
ISD::UMULO, MVT::v2i64, { 10, 10, 15, 19 } },
4046 {
ISD::UMULO, MVT::v8i32, { 8, 11, 13, 23 } },
4047 {
ISD::UMULO, MVT::v4i32, { 5, 12, 11, 12 } },
4048 {
ISD::UMULO, MVT::v16i16, { 4, 6, 8, 13 } },
4050 {
ISD::UMULO, MVT::v32i8, { 9, 13, 17, 33 } },
4051 {
ISD::UMULO, MVT::v16i8, { 6, 19, 13, 20 } },
4065 {
ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } },
4067 {
ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } },
4068 {
ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } },
4071 {
ISD::ABS, MVT::v4i64, { 6, 8, 6, 12 } },
4072 {
ISD::ABS, MVT::v8i32, { 3, 6, 4, 5 } },
4073 {
ISD::ABS, MVT::v16i16, { 3, 6, 4, 5 } },
4074 {
ISD::ABS, MVT::v32i8, { 3, 6, 4, 5 } },
4087 {
ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
4089 {
ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } },
4090 {
ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
4091 {
ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } },
4092 {
ISD::CTLZ, MVT::v4i32, { 12, 20, 19, 23 } },
4093 {
ISD::CTLZ, MVT::v16i16, { 19, 22, 29, 38 } },
4094 {
ISD::CTLZ, MVT::v8i16, { 9, 16, 14, 18 } },
4095 {
ISD::CTLZ, MVT::v32i8, { 14, 15, 19, 28 } },
4096 {
ISD::CTLZ, MVT::v16i8, { 7, 12, 9, 13 } },
4097 {
ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } },
4098 {
ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } },
4099 {
ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } },
4100 {
ISD::CTPOP, MVT::v4i32, { 9, 20, 14, 18 } },
4101 {
ISD::CTPOP, MVT::v16i16, { 16, 21, 22, 31 } },
4102 {
ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
4103 {
ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } },
4104 {
ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
4105 {
ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } },
4106 {
ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } },
4107 {
ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } },
4108 {
ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } },
4109 {
ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } },
4110 {
ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } },
4111 {
ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } },
4112 {
ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } },
4118 {
ISD::SMAX, MVT::v4i64, { 6, 9, 6, 12 } },
4119 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 4 } },
4120 {
ISD::SMAX, MVT::v8i32, { 4, 6, 5, 6 } },
4121 {
ISD::SMAX, MVT::v16i16, { 4, 6, 5, 6 } },
4122 {
ISD::SMAX, MVT::v32i8, { 4, 6, 5, 6 } },
4123 {
ISD::SMIN, MVT::v4i64, { 6, 9, 6, 12 } },
4124 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
4125 {
ISD::SMIN, MVT::v8i32, { 4, 6, 5, 6 } },
4126 {
ISD::SMIN, MVT::v16i16, { 4, 6, 5, 6 } },
4127 {
ISD::SMIN, MVT::v32i8, { 4, 6, 5, 6 } },
4128 {
ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
4129 {
ISD::SMULO, MVT::v2i64, { 9, 9, 13, 17 } },
4130 {
ISD::SMULO, MVT::v8i32, { 15, 20, 24, 29 } },
4131 {
ISD::SMULO, MVT::v4i32, { 7, 15, 11, 13 } },
4132 {
ISD::SMULO, MVT::v16i16, { 8, 14, 14, 15 } },
4134 {
ISD::SMULO, MVT::v32i8, { 20, 20, 37, 39 } },
4135 {
ISD::SMULO, MVT::v16i8, { 9, 22, 18, 21 } },
4146 {
ISD::UMAX, MVT::v4i64, { 9, 10, 11, 17 } },
4147 {
ISD::UMAX, MVT::v2i64, { 4, 8, 5, 7 } },
4148 {
ISD::UMAX, MVT::v8i32, { 4, 6, 5, 6 } },
4149 {
ISD::UMAX, MVT::v16i16, { 4, 6, 5, 6 } },
4150 {
ISD::UMAX, MVT::v32i8, { 4, 6, 5, 6 } },
4151 {
ISD::UMIN, MVT::v4i64, { 9, 10, 11, 17 } },
4152 {
ISD::UMIN, MVT::v2i64, { 4, 8, 5, 7 } },
4153 {
ISD::UMIN, MVT::v8i32, { 4, 6, 5, 6 } },
4154 {
ISD::UMIN, MVT::v16i16, { 4, 6, 5, 6 } },
4155 {
ISD::UMIN, MVT::v32i8, { 4, 6, 5, 6 } },
4156 {
ISD::UMULO, MVT::v4i64, { 24, 26, 39, 45 } },
4157 {
ISD::UMULO, MVT::v2i64, { 10, 12, 15, 20 } },
4158 {
ISD::UMULO, MVT::v8i32, { 14, 15, 23, 28 } },
4159 {
ISD::UMULO, MVT::v4i32, { 7, 12, 11, 13 } },
4160 {
ISD::UMULO, MVT::v16i16, { 7, 11, 13, 14 } },
4162 {
ISD::UMULO, MVT::v32i8, { 19, 19, 35, 37 } },
4163 {
ISD::UMULO, MVT::v16i8, { 9, 19, 17, 20 } },
4177 {
ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } },
4178 {
ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } },
4180 {
ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } },
4181 {
ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } },
4206 {
ISD::FSQRT, MVT::v4f32, { 37, 41, 1, 5 } },
4208 {
ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } },
4215 {
ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } },
4217 {
ISD::FSQRT, MVT::v2f64, { 70, 71, 1, 5 } },
4225 {
ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } },
4228 {
ISD::ABS, MVT::v2i64, { 3, 4, 3, 5 } },
4233 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 3 } },
4234 {
ISD::SMAX, MVT::v4i32, { 1, 1, 1, 1 } },
4235 {
ISD::SMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4236 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
4237 {
ISD::SMIN, MVT::v4i32, { 1, 1, 1, 1 } },
4238 {
ISD::SMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4239 {
ISD::SMULO, MVT::v2i64, { 9, 11, 13, 17 } },
4240 {
ISD::SMULO, MVT::v4i32, { 20, 24, 13, 19 } },
4242 {
ISD::SMULO, MVT::v16i8, { 13, 22, 24, 25 } },
4247 {
ISD::UMAX, MVT::v2i64, { 2, 11, 6, 7 } },
4248 {
ISD::UMAX, MVT::v4i32, { 1, 1, 1, 1 } },
4249 {
ISD::UMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4250 {
ISD::UMIN, MVT::v2i64, { 2, 11, 6, 7 } },
4251 {
ISD::UMIN, MVT::v4i32, { 1, 1, 1, 1 } },
4252 {
ISD::UMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4253 {
ISD::UMULO, MVT::v2i64, { 14, 20, 15, 20 } },
4254 {
ISD::UMULO, MVT::v4i32, { 19, 22, 12, 18 } },
4256 {
ISD::UMULO, MVT::v16i8, { 13, 19, 18, 20 } },
4259 {
ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
4260 {
ISD::ABS, MVT::v8i16, { 1, 2, 1, 1 } },
4261 {
ISD::ABS, MVT::v16i8, { 1, 2, 1, 1 } },
4269 {
ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
4270 {
ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
4271 {
ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
4272 {
ISD::CTLZ, MVT::v16i8, { 11, 15, 10, 16 } },
4273 {
ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } },
4274 {
ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
4275 {
ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
4276 {
ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
4277 {
ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } },
4278 {
ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } },
4279 {
ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } },
4280 {
ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } }
4283 {
ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } },
4284 {
ISD::ABS, MVT::v4i32, { 1, 4, 4, 4 } },
4285 {
ISD::ABS, MVT::v8i16, { 1, 2, 3, 3 } },
4286 {
ISD::ABS, MVT::v16i8, { 1, 2, 3, 3 } },
4291 {
ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
4294 {
ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
4295 {
ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
4296 {
ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
4297 {
ISD::CTLZ, MVT::v16i8, { 8, 39, 29, 32 } },
4298 {
ISD::CTPOP, MVT::v2i64, { 12, 26, 16, 18 } },
4299 {
ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } },
4300 {
ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } },
4301 {
ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } },
4302 {
ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } },
4303 {
ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } },
4304 {
ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } },
4305 {
ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } },
4310 {
ISD::SMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4311 {
ISD::SMAX, MVT::v4i32, { 2, 4, 5, 5 } },
4312 {
ISD::SMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4313 {
ISD::SMAX, MVT::v16i8, { 2, 4, 5, 5 } },
4314 {
ISD::SMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4315 {
ISD::SMIN, MVT::v4i32, { 2, 4, 5, 5 } },
4316 {
ISD::SMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4317 {
ISD::SMIN, MVT::v16i8, { 2, 4, 5, 5 } },
4318 {
ISD::SMULO, MVT::v2i64, { 30, 33, 13, 23 } },
4319 {
ISD::SMULO, MVT::v4i32, { 20, 24, 23, 23 } },
4321 {
ISD::SMULO, MVT::v16i8, { 13, 23, 24, 25 } },
4330 {
ISD::UMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4331 {
ISD::UMAX, MVT::v4i32, { 2, 5, 8, 8 } },
4332 {
ISD::UMAX, MVT::v8i16, { 1, 3, 3, 3 } },
4333 {
ISD::UMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4334 {
ISD::UMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4335 {
ISD::UMIN, MVT::v4i32, { 2, 5, 8, 8 } },
4336 {
ISD::UMIN, MVT::v8i16, { 1, 3, 3, 3 } },
4337 {
ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4338 {
ISD::UMULO, MVT::v2i64, { 30, 33, 15, 29 } },
4339 {
ISD::UMULO, MVT::v4i32, { 19, 22, 14, 18 } },
4341 {
ISD::UMULO, MVT::v16i8, { 13, 19, 20, 20 } },
4349 {
ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } },
4355 {
ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } },
4358 {
ISD::CTTZ, MVT::i64, { 1, 1, 1, 1 } },
4361 {
ISD::CTTZ, MVT::i32, { 1, 1, 1, 1 } },
4362 {
ISD::CTTZ, MVT::i16, { 2, 1, 1, 1 } },
4366 {
ISD::CTLZ, MVT::i64, { 1, 1, 1, 1 } },
4369 {
ISD::CTLZ, MVT::i32, { 1, 1, 1, 1 } },
4370 {
ISD::CTLZ, MVT::i16, { 2, 1, 1, 1 } },
4382 {
ISD::ABS, MVT::i64, { 1, 2, 3, 3 } },
4385 {
ISD::CTLZ, MVT::i64, { 1, 2, 3, 3 } },
4386 {
ISD::CTLZ, MVT::i32, { 1, 2, 3, 3 } },
4387 {
ISD::CTLZ, MVT::i16, { 2, 2, 3, 3 } },
4390 {
ISD::CTTZ, MVT::i64, { 1, 2, 2, 2 } },
4391 {
ISD::CTTZ, MVT::i32, { 1, 2, 2, 2 } },
4392 {
ISD::CTTZ, MVT::i16, { 2, 2, 2, 2 } },
4396 {
ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
4397 {
ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
4399 {
ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
4404 {
ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
4405 {
ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
4406 {
ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
4407 {
ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
4414 {
ISD::ABS, MVT::i32, { 1, 2, 3, 3 } },
4415 {
ISD::ABS, MVT::i16, { 2, 2, 3, 3 } },
4416 {
ISD::ABS, MVT::i8, { 2, 4, 4, 3 } },
4422 {
ISD::CTLZ, MVT::i32, { 2, 2, 4, 5 } },
4423 {
ISD::CTLZ, MVT::i16, { 2, 2, 4, 5 } },
4428 {
ISD::CTTZ, MVT::i32, { 2, 2, 3, 3 } },
4429 {
ISD::CTTZ, MVT::i16, { 2, 2, 2, 3 } },
4437 {
ISD::ROTL, MVT::i32, { 2, 3, 1, 3 } },
4438 {
ISD::ROTL, MVT::i16, { 2, 3, 1, 3 } },
4440 {
ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
4441 {
ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
4446 {
ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
4447 {
ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
4461 {
ISD::SMAX, MVT::i32, { 1, 2, 2, 3 } },
4462 {
ISD::SMAX, MVT::i16, { 1, 4, 2, 4 } },
4464 {
ISD::SMIN, MVT::i32, { 1, 2, 2, 3 } },
4465 {
ISD::SMIN, MVT::i16, { 1, 4, 2, 4 } },
4467 {
ISD::UMAX, MVT::i32, { 1, 2, 2, 3 } },
4468 {
ISD::UMAX, MVT::i16, { 1, 4, 2, 4 } },
4470 {
ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
4471 {
ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
4494 case Intrinsic::abs:
4497 case Intrinsic::bitreverse:
4500 case Intrinsic::bswap:
4503 case Intrinsic::ctlz:
4506 case Intrinsic::ctpop:
4509 case Intrinsic::cttz:
4512 case Intrinsic::fshl:
4516 if (Args[0] == Args[1]) {
4527 case Intrinsic::fshr:
4532 if (Args[0] == Args[1]) {
4543 case Intrinsic::lrint:
4544 case Intrinsic::llrint: {
4551 case Intrinsic::maxnum:
4552 case Intrinsic::minnum:
4556 case Intrinsic::sadd_sat:
4559 case Intrinsic::smax:
4562 case Intrinsic::smin:
4565 case Intrinsic::ssub_sat:
4568 case Intrinsic::uadd_sat:
4571 case Intrinsic::umax:
4574 case Intrinsic::umin:
4577 case Intrinsic::usub_sat:
4580 case Intrinsic::sqrt:
4583 case Intrinsic::sadd_with_overflow:
4584 case Intrinsic::ssub_with_overflow:
4587 OpTy =
RetTy->getContainedType(0);
4589 case Intrinsic::uadd_with_overflow:
4590 case Intrinsic::usub_with_overflow:
4593 OpTy =
RetTy->getContainedType(0);
4595 case Intrinsic::smul_with_overflow:
4597 OpTy =
RetTy->getContainedType(0);
4599 case Intrinsic::umul_with_overflow:
4601 OpTy =
RetTy->getContainedType(0);
4606 auto adjustTableCost = [&](
int ISD,
unsigned Cost,
4607 std::pair<InstructionCost, MVT> LT,
4610 MVT MTy = LT.second;
4617 return LegalizationCost * 1;
4622 if (ISD ==
ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
4624 if (
II->hasOneUse() && isa<StoreInst>(
II->user_back()))
4626 if (
auto *LI = dyn_cast<LoadInst>(
II->getOperand(0))) {
4627 if (LI->hasOneUse())
4634 return LegalizationCost * (int)
Cost;
4639 MVT MTy = LT.second;
4642 if (((ISD ==
ISD::CTTZ && !ST->hasBMI()) ||
4643 (ISD ==
ISD::CTLZ && !ST->hasLZCNT())) &&
4646 if (
auto *Cst = dyn_cast<ConstantInt>(Args[1]))
4647 if (Cst->isAllOnesValue())
4655 if (ST->useGLMDivSqrtCosts())
4657 if (
auto KindCost = Entry->Cost[
CostKind])
4658 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4660 if (ST->useSLMArithCosts())
4662 if (
auto KindCost = Entry->Cost[
CostKind])
4663 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4666 if (
const auto *Entry =
CostTableLookup(AVX512VBMI2CostTbl, ISD, MTy))
4667 if (
auto KindCost = Entry->Cost[
CostKind])
4668 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4670 if (ST->hasBITALG())
4671 if (
const auto *Entry =
CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
4672 if (
auto KindCost = Entry->Cost[
CostKind])
4673 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4675 if (ST->hasVPOPCNTDQ())
4676 if (
const auto *Entry =
CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
4677 if (
auto KindCost = Entry->Cost[
CostKind])
4678 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4682 if (
auto KindCost = Entry->Cost[
CostKind])
4683 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4687 if (
auto KindCost = Entry->Cost[
CostKind])
4688 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4692 if (
auto KindCost = Entry->Cost[
CostKind])
4693 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4697 if (
auto KindCost = Entry->Cost[
CostKind])
4698 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4702 if (
auto KindCost = Entry->Cost[
CostKind])
4703 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4707 if (
auto KindCost = Entry->Cost[
CostKind])
4708 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4712 if (
auto KindCost = Entry->Cost[
CostKind])
4713 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4717 if (
auto KindCost = Entry->Cost[
CostKind])
4718 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4722 if (
auto KindCost = Entry->Cost[
CostKind])
4723 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4727 if (
auto KindCost = Entry->Cost[
CostKind])
4728 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4732 if (
auto KindCost = Entry->Cost[
CostKind])
4733 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4737 if (
auto KindCost = Entry->Cost[
CostKind])
4738 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4743 if (
auto KindCost = Entry->Cost[
CostKind])
4744 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4747 if (
auto KindCost = Entry->Cost[
CostKind])
4748 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4751 if (ST->hasLZCNT()) {
4754 if (
auto KindCost = Entry->Cost[
CostKind])
4755 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4758 if (
auto KindCost = Entry->Cost[
CostKind])
4759 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4762 if (ST->hasPOPCNT()) {
4765 if (
auto KindCost = Entry->Cost[
CostKind])
4766 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4769 if (
auto KindCost = Entry->Cost[
CostKind])
4770 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4775 if (
auto KindCost = Entry->Cost[
CostKind])
4776 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4779 if (
auto KindCost = Entry->Cost[
CostKind])
4780 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4785 (IID == Intrinsic::fshl || IID == Intrinsic::fshr)) {
4786 Type *CondTy =
RetTy->getWithNewBitWidth(1);
4806 unsigned Index,
const Value *Op0,
4807 const Value *Op1)
const {
4821 if (Index == -1U && (Opcode == Instruction::ExtractElement ||
4822 Opcode == Instruction::InsertElement)) {
4827 assert(isa<FixedVectorType>(Val) &&
"Fixed vector type expected");
4832 if (Opcode == Instruction::ExtractElement) {
4838 if (Opcode == Instruction::InsertElement) {
4846 if (Index != -1U && (Opcode == Instruction::ExtractElement ||
4847 Opcode == Instruction::InsertElement)) {
4849 if (Opcode == Instruction::ExtractElement &&
4851 cast<FixedVectorType>(Val)->getNumElements() > 1)
4858 if (!LT.second.isVector())
4862 unsigned SizeInBits = LT.second.getSizeInBits();
4863 unsigned NumElts = LT.second.getVectorNumElements();
4864 unsigned SubNumElts = NumElts;
4865 Index = Index % NumElts;
4869 if (SizeInBits > 128) {
4870 assert((SizeInBits % 128) == 0 &&
"Illegal vector");
4871 unsigned NumSubVecs = SizeInBits / 128;
4872 SubNumElts = NumElts / NumSubVecs;
4873 if (SubNumElts <= Index) {
4874 RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
4875 Index %= SubNumElts;
4879 MVT MScalarTy = LT.second.getScalarType();
4880 auto IsCheapPInsrPExtrInsertPS = [&]() {
4884 return (MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4886 (MScalarTy == MVT::f32 && ST->
hasSSE1() && Index == 0 &&
4887 Opcode == Instruction::InsertElement) ||
4888 (MScalarTy == MVT::f32 && ST->
hasSSE41() &&
4889 Opcode == Instruction::InsertElement);
4897 (Opcode != Instruction::InsertElement || !Op0 ||
4898 isa<UndefValue>(Op0)))
4899 return RegisterFileMoveCost;
4901 if (Opcode == Instruction::InsertElement &&
4902 isa_and_nonnull<UndefValue>(Op0)) {
4904 if (isa_and_nonnull<LoadInst>(Op1))
4905 return RegisterFileMoveCost;
4906 if (!IsCheapPInsrPExtrInsertPS()) {
4909 return 2 + RegisterFileMoveCost;
4911 return 1 + RegisterFileMoveCost;
4916 if (ScalarType->
isIntegerTy() && Opcode == Instruction::ExtractElement)
4917 return 1 + RegisterFileMoveCost;
4921 assert(ISD &&
"Unexpected vector opcode");
4922 if (ST->useSLMArithCosts())
4924 return Entry->Cost + RegisterFileMoveCost;
4927 if (IsCheapPInsrPExtrInsertPS())
4928 return 1 + RegisterFileMoveCost;
4937 if (Opcode == Instruction::InsertElement) {
4938 auto *SubTy = cast<VectorType>(Val);
4946 return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
4950 RegisterFileMoveCost;
4958 cast<FixedVectorType>(Ty)->getNumElements() &&
4959 "Vector size mismatch");
4962 MVT MScalarTy = LT.second.getScalarType();
4963 unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
4966 constexpr unsigned LaneBitWidth = 128;
4967 assert((LegalVectorBitWidth < LaneBitWidth ||
4968 (LegalVectorBitWidth % LaneBitWidth) == 0) &&
4971 const int NumLegalVectors = LT.first.getValue();
4972 assert(NumLegalVectors >= 0 &&
"Negative cost!");
4978 if (Insert && !ForPoisonSrc) {
4984 if (!DemandedElts[
I])
4988 VL.
empty() ?
nullptr : VL[
I]);
4994 if ((MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4996 (MScalarTy == MVT::f32 && ST->
hasSSE41())) {
4999 if (LegalVectorBitWidth <= LaneBitWidth) {
5015 assert((LegalVectorBitWidth % LaneBitWidth) == 0 &&
"Illegal vector");
5016 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
5017 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
5018 unsigned NumLegalElts =
5019 LT.second.getVectorNumElements() * NumLegalVectors;
5021 "Vector has been legalized to smaller element count");
5022 assert((NumLegalElts % NumLanesTotal) == 0 &&
5023 "Unexpected elts per lane");
5024 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
5026 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
5030 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
5032 NumEltsPerLane, NumEltsPerLane *
I);
5033 if (LaneEltMask.
isZero())
5044 APInt AffectedLanes =
5047 AffectedLanes, NumLegalVectors,
true);
5048 for (
int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) {
5049 for (
unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) {
5050 unsigned I = NumLegalLanes * LegalVec + Lane;
5053 if (!AffectedLanes[
I] ||
5054 (Lane == 0 && FullyAffectedLegalVectors[LegalVec]))
5061 }
else if (LT.second.isVector()) {
5072 unsigned NumElts = LT.second.getVectorNumElements();
5075 Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first;
5084 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
5085 unsigned MaxElts = ST->
hasAVX2() ? 32 : 16;
5086 unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
5090 if (LT.second.isVector()) {
5091 unsigned NumLegalElts =
5092 LT.second.getVectorNumElements() * NumLegalVectors;
5094 "Vector has been legalized to smaller element count");
5098 if (LegalVectorBitWidth > LaneBitWidth) {
5099 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
5100 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
5101 assert((NumLegalElts % NumLanesTotal) == 0 &&
5102 "Unexpected elts per lane");
5103 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
5107 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
5111 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
5113 NumEltsPerLane,
I * NumEltsPerLane);
5114 if (LaneEltMask.
isZero())
5117 I * NumEltsPerLane, LaneTy);
5119 LaneTy, LaneEltMask,
false, Extract,
CostKind);
5136 int VF,
const APInt &DemandedDstElts,
5142 auto bailout = [&]() {
5152 unsigned PromEltTyBits = EltTyBits;
5153 switch (EltTyBits) {
5184 int NumDstElements = VF * ReplicationFactor;
5198 if (PromEltTyBits != EltTyBits) {
5204 Instruction::SExt, PromSrcVecTy, SrcVecTy,
5211 ReplicationFactor, VF,
5217 "We expect that the legalization doesn't affect the element width, "
5218 "doesn't coalesce/split elements.");
5221 unsigned NumDstVectors =
5222 divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
5231 DemandedDstElts.
zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
5232 unsigned NumDstVectorsDemanded = DemandedDstVectors.
popcount();
5238 return NumDstVectorsDemanded * SingleShuffleCost;
5249 if (
auto *SI = dyn_cast_or_null<StoreInst>(
I)) {
5252 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
5253 if (!
all_of(
GEP->indices(), [](
Value *V) { return isa<Constant>(V); }))
5260 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
5270 auto *VTy = dyn_cast<FixedVectorType>(Src);
5275 if (Opcode == Instruction::Store && OpInfo.
isConstant())
5281 if (!VTy || !LT.second.isVector()) {
5283 return (LT.second.isFloatingPoint() ?
Cost : 0) + LT.first * 1;
5286 bool IsLoad = Opcode == Instruction::Load;
5288 Type *EltTy = VTy->getElementType();
5293 const unsigned SrcNumElt = VTy->getNumElements();
5296 int NumEltRemaining = SrcNumElt;
5298 auto NumEltDone = [&]() {
return SrcNumElt - NumEltRemaining; };
5300 const int MaxLegalOpSizeBytes =
divideCeil(LT.second.getSizeInBits(), 8);
5303 const unsigned XMMBits = 128;
5304 if (XMMBits % EltTyBits != 0)
5308 const int NumEltPerXMM = XMMBits / EltTyBits;
5312 for (
int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0;
5313 NumEltRemaining > 0; CurrOpSizeBytes /= 2) {
5315 if ((8 * CurrOpSizeBytes) % EltTyBits != 0)
5319 int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits;
5321 assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 &&
"How'd we get here?");
5322 assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) ||
5323 (CurrOpSizeBytes == MaxLegalOpSizeBytes)) &&
5324 "Unless we haven't halved the op size yet, "
5325 "we have less than two op's sized units of work left.");
5327 auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM
5331 assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 &&
5332 "After halving sizes, the vector elt count is no longer a multiple "
5333 "of number of elements per operation?");
5334 auto *CoalescedVecTy =
5335 CurrNumEltPerOp == 1
5339 EltTyBits * CurrNumEltPerOp),
5340 CurrVecTy->getNumElements() / CurrNumEltPerOp);
5343 "coalesciing elements doesn't change vector width.");
5345 while (NumEltRemaining > 0) {
5346 assert(SubVecEltsLeft >= 0 &&
"Subreg element count overconsumtion?");
5350 if (NumEltRemaining < CurrNumEltPerOp &&
5351 (!IsLoad || Alignment < CurrOpSizeBytes) && CurrOpSizeBytes != 1)
5359 if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
5361 else if (CurrOpSizeBytes < 4)
5371 bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0;
5374 if (SubVecEltsLeft == 0) {
5375 SubVecEltsLeft += CurrVecTy->getNumElements();
5381 VTy, VTy, {},
CostKind, NumEltDone(), CurrVecTy);
5388 if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) {
5389 int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM;
5390 assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 &&
"");
5391 int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp;
5392 APInt DemandedElts =
5394 CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
5395 assert(DemandedElts.
popcount() == 1 &&
"Inserting single value");
5400 SubVecEltsLeft -= CurrNumEltPerOp;
5401 NumEltRemaining -= CurrNumEltPerOp;
5406 assert(NumEltRemaining <= 0 &&
"Should have processed all the elements.");
5415 bool IsLoad = (Instruction::Load == Opcode);
5416 bool IsStore = (Instruction::Store == Opcode);
5418 auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
5423 unsigned NumElem = SrcVTy->getNumElements();
5431 MaskTy, DemandedElts,
false,
true,
CostKind);
5436 InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
5438 SrcVTy, DemandedElts, IsLoad, IsStore,
CostKind);
5442 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
5450 if (Ty == MVT::i16 || Ty == MVT::i32 || Ty == MVT::i64)
5452 return Cost + LT.first;
5454 if (VT.isSimple() && Ty != VT.getSimpleVT() &&
5455 LT.second.getVectorNumElements() == NumElem)
5464 (
unsigned)LT.first.getValue() *
5473 return Cost + LT.first * (IsLoad ? 2 : 8);
5476 return Cost + LT.first;
5483 if (
Info.isSameBase() &&
Info.isKnownStride()) {
5487 if (
const auto *BaseGEP = dyn_cast<GetElementPtrInst>(
Base)) {
5489 return getGEPCost(BaseGEP->getSourceElementType(),
5490 BaseGEP->getPointerOperand(), Indices,
nullptr,
5506 const unsigned NumVectorInstToHideOverhead = 10;
5519 return NumVectorInstToHideOverhead;
5529 std::optional<FastMathFlags> FMF,
5570 assert(ISD &&
"Invalid opcode");
5578 if (ST->useSLMArithCosts())
5593 MVT MTy = LT.second;
5595 auto *ValVTy = cast<FixedVectorType>(ValTy);
5608 if (LT.first != 1 && MTy.
isVector() &&
5614 ArithmeticCost *= LT.first - 1;
5617 if (ST->useSLMArithCosts())
5619 return ArithmeticCost + Entry->Cost;
5623 return ArithmeticCost + Entry->Cost;
5627 return ArithmeticCost + Entry->Cost;
5676 if (ValVTy->getElementType()->isIntegerTy(1)) {
5678 if (LT.first != 1 && MTy.
isVector() &&
5684 ArithmeticCost *= LT.first - 1;
5688 if (
const auto *Entry =
CostTableLookup(AVX512BoolReduction, ISD, MTy))
5689 return ArithmeticCost + Entry->Cost;
5692 return ArithmeticCost + Entry->Cost;
5695 return ArithmeticCost + Entry->Cost;
5698 return ArithmeticCost + Entry->Cost;
5703 unsigned NumVecElts = ValVTy->getNumElements();
5704 unsigned ScalarSize = ValVTy->getScalarSizeInBits();
5714 if (LT.first != 1 && MTy.
isVector() &&
5720 ReductionCost *= LT.first - 1;
5726 while (NumVecElts > 1) {
5728 unsigned Size = NumVecElts * ScalarSize;
5736 }
else if (
Size == 128) {
5739 if (ValVTy->isFloatingPointTy())
5747 }
else if (
Size == 64) {
5750 if (ValVTy->isFloatingPointTy())
5763 Instruction::LShr, ShiftTy,
CostKind,
5790 MVT MTy = LT.second;
5794 ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ?
ISD::UMIN
5798 "Expected float point or integer vector type.");
5799 ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
5867 auto *ValVTy = cast<FixedVectorType>(ValTy);
5868 unsigned NumVecElts = ValVTy->getNumElements();
5872 if (LT.first != 1 && MTy.
isVector() &&
5878 MinMaxCost *= LT.first - 1;
5884 return MinMaxCost + Entry->Cost;
5888 return MinMaxCost + Entry->Cost;
5892 return MinMaxCost + Entry->Cost;
5896 return MinMaxCost + Entry->Cost;
5908 while (NumVecElts > 1) {
5910 unsigned Size = NumVecElts * ScalarSize;
5918 }
else if (
Size == 128) {
5928 }
else if (
Size == 64) {
5989 if (BitSize % 64 != 0)
5990 ImmVal = Imm.sext(
alignTo(BitSize, 64));
5995 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
6001 return std::max<InstructionCost>(1,
Cost);
6011 unsigned ImmBitWidth = Imm.getBitWidth();
6018 unsigned ImmIdx = ~0U;
6022 case Instruction::GetElementPtr:
6029 case Instruction::Store:
6032 case Instruction::ICmp:
6037 if (
Idx == 1 && ImmBitWidth == 64) {
6038 uint64_t ImmVal = Imm.getZExtValue();
6039 if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
6042 if (
auto *Cmp = dyn_cast_or_null<CmpInst>(Inst)) {
6043 if (Cmp->isEquality()) {
6052 case Instruction::And:
6056 if (
Idx == 1 && ImmBitWidth == 64 && Imm.isIntN(32))
6059 if (
Idx == 1 && ImmBitWidth == 64 && ST->is64Bit() && ST->hasBMI() &&
6064 case Instruction::Add:
6065 case Instruction::Sub:
6067 if (
Idx == 1 && ImmBitWidth == 64 && Imm.getZExtValue() == 0x80000000)
6071 case Instruction::UDiv:
6072 case Instruction::SDiv:
6073 case Instruction::URem:
6074 case Instruction::SRem:
6079 case Instruction::Mul:
6080 case Instruction::Or:
6081 case Instruction::Xor:
6085 case Instruction::Shl:
6086 case Instruction::LShr:
6087 case Instruction::AShr:
6091 case Instruction::Trunc:
6092 case Instruction::ZExt:
6093 case Instruction::SExt:
6094 case Instruction::IntToPtr:
6095 case Instruction::PtrToInt:
6096 case Instruction::BitCast:
6097 case Instruction::PHI:
6098 case Instruction::Call:
6099 case Instruction::Select:
6100 case Instruction::Ret:
6101 case Instruction::Load:
6105 if (
Idx == ImmIdx) {
6131 case Intrinsic::sadd_with_overflow:
6132 case Intrinsic::uadd_with_overflow:
6133 case Intrinsic::ssub_with_overflow:
6134 case Intrinsic::usub_with_overflow:
6135 case Intrinsic::smul_with_overflow:
6136 case Intrinsic::umul_with_overflow:
6137 if ((
Idx == 1) && Imm.getBitWidth() <= 64 && Imm.isSignedIntN(32))
6140 case Intrinsic::experimental_stackmap:
6141 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
6144 case Intrinsic::experimental_patchpoint_void:
6145 case Intrinsic::experimental_patchpoint:
6146 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
6162int X86TTIImpl::getGatherOverhead()
const {
6175int X86TTIImpl::getScatterOverhead()
const {
6189 assert(isa<VectorType>(SrcVTy) &&
"Unexpected type in getGSVectorCost");
6190 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
6200 if (IndexSize < 64 || !
GEP)
6203 unsigned NumOfVarIndices = 0;
6204 const Value *Ptrs =
GEP->getPointerOperand();
6207 for (
unsigned I = 1, E =
GEP->getNumOperands();
I != E; ++
I) {
6208 if (isa<Constant>(
GEP->getOperand(
I)))
6210 Type *IndxTy =
GEP->getOperand(
I)->getType();
6211 if (
auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
6212 IndxTy = IndexVTy->getElementType();
6214 !isa<SExtInst>(
GEP->getOperand(
I))) ||
6215 ++NumOfVarIndices > 1)
6218 return (
unsigned)32;
6223 unsigned IndexSize = (ST->
hasAVX512() && VF >= 16)
6224 ? getIndexSizeInBits(
Ptr,
DL)
6232 std::max(IdxsLT.first, SrcLT.first).getValue();
6233 if (SplitFactor > 1) {
6237 return SplitFactor * getGSVectorCost(Opcode,
CostKind, SplitSrcTy,
Ptr,
6247 const int GSOverhead = (Opcode == Instruction::Load) ? getGatherOverhead()
6248 : getScatterOverhead();
6255 unsigned Opcode,
Type *SrcVTy,
const Value *
Ptr,
bool VariableMask,
6258 if ((Opcode == Instruction::Load &&
6261 Align(Alignment)))) ||
6262 (Opcode == Instruction::Store &&
6265 Align(Alignment)))))
6271 if (!PtrTy &&
Ptr->getType()->isVectorTy())
6272 PtrTy = dyn_cast<PointerType>(
6273 cast<VectorType>(
Ptr->getType())->getElementType());
6274 assert(PtrTy &&
"Unexpected type for Ptr argument");
6276 return getGSVectorCost(Opcode,
CostKind, SrcVTy,
Ptr, Alignment,
6290 return ST->hasMacroFusion() || ST->hasBranchFusion();
6303 if (ScalarTy->
isHalfTy() && ST->hasBWI())
6313 return IntWidth == 32 || IntWidth == 64 ||
6314 ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
6322 if (isa<VectorType>(DataTy) &&
6324 return ST->hasCF() &&
6335 if (isa<VectorType>(DataTy) &&
6337 return ST->hasCF() &&
6348 if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
6365 if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
6387 if (!isa<VectorType>(DataTy))
6397 Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
6406 return IntWidth == 32 || IntWidth == 64 ||
6407 ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
6411 Align Alignment)
const {
6415bool X86TTIImpl::supportsGather()
const {
6423 Align Alignment)
const {
6430 unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
6431 return NumElts == 1 ||
6432 (ST->
hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())));
6436 Align Alignment)
const {
6448 return IntWidth == 32 || IntWidth == 64;
6452 if (!supportsGather() || !ST->preferGather())
6467 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
6468 assert(OpcodeMask.
size() == NumElements &&
"Mask and VecTy are incompatible");
6473 for (
int Lane : seq<int>(0, NumElements)) {
6474 unsigned Opc = OpcodeMask.
test(Lane) ? Opcode1 : Opcode0;
6476 if (Lane % 2 == 0 &&
Opc != Instruction::FSub)
6478 if (Lane % 2 == 1 &&
Opc != Instruction::FAdd)
6482 Type *ElemTy = cast<VectorType>(VecTy)->getElementType();
6484 return ST->
hasSSE3() && NumElements % 4 == 0;
6486 return ST->
hasSSE3() && NumElements % 2 == 0;
6492 if (!ST->
hasAVX512() || !ST->preferScatter())
6505 if (
I->getOpcode() == Instruction::FDiv)
6519 TM.getSubtargetImpl(*Caller)->getFeatureBits();
6521 TM.getSubtargetImpl(*Callee)->getFeatureBits();
6524 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
6525 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
6526 if (RealCallerBits == RealCalleeBits)
6531 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
6535 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
6541 for (
Value *Arg : CB->args())
6542 Types.push_back(Arg->getType());
6543 if (!CB->getType()->isVoidTy())
6544 Types.push_back(CB->getType());
6547 auto IsSimpleTy = [](
Type *Ty) {
6548 return !Ty->isVectorTy() && !Ty->isAggregateType();
6550 if (
all_of(Types, IsSimpleTy))
6580 [](
Type *
T) {
return T->isVectorTy() ||
T->isAggregateType(); });
6589 Options.AllowOverlappingLoads =
true;
6594 if (PreferredWidth >= 512 && ST->
hasAVX512() && ST->hasEVEX512())
6595 Options.LoadSizes.push_back(64);
6596 if (PreferredWidth >= 256 && ST->
hasAVX())
Options.LoadSizes.push_back(32);
6597 if (PreferredWidth >= 128 && ST->
hasSSE2())
Options.LoadSizes.push_back(16);
6599 if (ST->is64Bit()) {
6600 Options.LoadSizes.push_back(8);
6602 Options.LoadSizes.push_back(4);
6603 Options.LoadSizes.push_back(2);
6604 Options.LoadSizes.push_back(1);
6609 return supportsGather();
6620 return !(ST->isAtom());
6631 bool UseMaskForGaps)
const {
6641 unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
6647 bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
6660 if (UseMaskedMemOp) {
6662 for (
unsigned Index : Indices) {
6663 assert(Index < Factor &&
"Invalid index for interleaved memory op");
6664 for (
unsigned Elm = 0; Elm < VF; Elm++)
6665 DemandedLoadStoreElts.
setBit(Index + Elm * Factor);
6672 UseMaskForGaps ? DemandedLoadStoreElts
6681 if (UseMaskForGaps) {
6687 if (Opcode == Instruction::Load) {
6694 static const CostTblEntry AVX512InterleavedLoadTbl[] = {
6695 {3, MVT::v16i8, 12},
6696 {3, MVT::v32i8, 14},
6697 {3, MVT::v64i8, 22},
6700 if (
const auto *Entry =
6702 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6712 ShuffleKind, SingleMemOpTy, SingleMemOpTy, {},
CostKind, 0,
nullptr);
6714 unsigned NumOfLoadsInInterleaveGrp =
6715 Indices.
size() ? Indices.
size() : Factor;
6724 unsigned NumOfUnfoldedLoads =
6725 UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
6728 unsigned NumOfShufflesPerResult =
6729 std::max((
unsigned)1, (
unsigned)(NumOfMemOps - 1));
6736 NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
6739 MaskCost + NumOfUnfoldedLoads * MemOpCost +
6746 assert(Opcode == Instruction::Store &&
6747 "Expected Store Instruction at this point");
6749 static const CostTblEntry AVX512InterleavedStoreTbl[] = {
6750 {3, MVT::v16i8, 12},
6751 {3, MVT::v32i8, 14},
6752 {3, MVT::v64i8, 26},
6755 {4, MVT::v16i8, 11},
6756 {4, MVT::v32i8, 14},
6760 if (
const auto *Entry =
6762 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6767 unsigned NumOfSources = Factor;
6771 unsigned NumOfShufflesPerStore = NumOfSources - 1;
6775 unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
6778 NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
6786 bool UseMaskForCond,
bool UseMaskForGaps)
const {
6787 auto *VecTy = cast<FixedVectorType>(
BaseTy);
6789 auto isSupportedOnAVX512 = [&](
Type *VecTy) {
6790 Type *EltTy = cast<VectorType>(VecTy)->getElementType();
6795 return ST->hasBWI();
6797 return ST->hasBF16();
6800 if (ST->
hasAVX512() && isSupportedOnAVX512(VecTy))
6802 Opcode, VecTy, Factor, Indices, Alignment,
6805 if (UseMaskForCond || UseMaskForGaps)
6808 UseMaskForCond, UseMaskForGaps);
6828 unsigned VF = VecTy->getNumElements() / Factor;
6829 Type *ScalarTy = VecTy->getElementType();
6861 {2, MVT::v16i16, 9},
6862 {2, MVT::v32i16, 18},
6865 {2, MVT::v16i32, 8},
6866 {2, MVT::v32i32, 16},
6870 {2, MVT::v16i64, 16},
6871 {2, MVT::v32i64, 32},
6876 {3, MVT::v16i8, 11},
6877 {3, MVT::v32i8, 14},
6882 {3, MVT::v16i16, 28},
6883 {3, MVT::v32i16, 56},
6888 {3, MVT::v16i32, 14},
6889 {3, MVT::v32i32, 32},
6893 {3, MVT::v8i64, 10},
6894 {3, MVT::v16i64, 20},
6899 {4, MVT::v16i8, 24},
6900 {4, MVT::v32i8, 56},
6903 {4, MVT::v4i16, 17},
6904 {4, MVT::v8i16, 33},
6905 {4, MVT::v16i16, 75},
6906 {4, MVT::v32i16, 150},
6910 {4, MVT::v8i32, 16},
6911 {4, MVT::v16i32, 32},
6912 {4, MVT::v32i32, 68},
6916 {4, MVT::v8i64, 20},
6917 {4, MVT::v16i64, 40},
6922 {6, MVT::v16i8, 43},
6923 {6, MVT::v32i8, 82},
6925 {6, MVT::v2i16, 13},
6927 {6, MVT::v8i16, 39},
6928 {6, MVT::v16i16, 106},
6929 {6, MVT::v32i16, 212},
6932 {6, MVT::v4i32, 15},
6933 {6, MVT::v8i32, 31},
6934 {6, MVT::v16i32, 64},
6937 {6, MVT::v4i64, 18},
6938 {6, MVT::v8i64, 36},
6943 static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
6957 static const CostTblEntry AVX2InterleavedStoreTbl[] = {
6962 {2, MVT::v16i16, 4},
6963 {2, MVT::v32i16, 8},
6967 {2, MVT::v16i32, 8},
6968 {2, MVT::v32i32, 16},
6973 {2, MVT::v16i64, 16},
6974 {2, MVT::v32i64, 32},
6979 {3, MVT::v16i8, 11},
6980 {3, MVT::v32i8, 13},
6984 {3, MVT::v8i16, 12},
6985 {3, MVT::v16i16, 27},
6986 {3, MVT::v32i16, 54},
6990 {3, MVT::v8i32, 11},
6991 {3, MVT::v16i32, 22},
6992 {3, MVT::v32i32, 48},
6996 {3, MVT::v8i64, 12},
6997 {3, MVT::v16i64, 24},
7003 {4, MVT::v32i8, 12},
7007 {4, MVT::v8i16, 10},
7008 {4, MVT::v16i16, 32},
7009 {4, MVT::v32i16, 64},
7013 {4, MVT::v8i32, 16},
7014 {4, MVT::v16i32, 32},
7015 {4, MVT::v32i32, 64},
7019 {4, MVT::v8i64, 20},
7020 {4, MVT::v16i64, 40},
7025 {6, MVT::v16i8, 27},
7026 {6, MVT::v32i8, 90},
7028 {6, MVT::v2i16, 10},
7029 {6, MVT::v4i16, 15},
7030 {6, MVT::v8i16, 21},
7031 {6, MVT::v16i16, 58},
7032 {6, MVT::v32i16, 90},
7035 {6, MVT::v4i32, 12},
7036 {6, MVT::v8i32, 33},
7037 {6, MVT::v16i32, 66},
7040 {6, MVT::v4i64, 15},
7041 {6, MVT::v8i64, 30},
7044 static const CostTblEntry SSE2InterleavedStoreTbl[] = {
7055 if (Opcode == Instruction::Load) {
7056 auto GetDiscountedCost = [Factor, NumMembers = Indices.
size(),
7060 return MemOpCosts +
divideCeil(NumMembers * Entry->Cost, Factor);
7064 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedLoadTbl, Factor,
7066 return GetDiscountedCost(Entry);
7069 if (
const auto *Entry =
CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
7071 return GetDiscountedCost(Entry);
7074 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedLoadTbl, Factor,
7076 return GetDiscountedCost(Entry);
7078 assert(Opcode == Instruction::Store &&
7079 "Expected Store Instruction at this point");
7081 "Interleaved store only supports fully-interleaved groups.");
7083 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedStoreTbl, Factor,
7085 return MemOpCosts + Entry->Cost;
7088 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedStoreTbl, Factor,
7090 return MemOpCosts + Entry->Cost;
7095 UseMaskForCond, UseMaskForGaps);
7100 bool HasBaseReg, int64_t Scale,
7101 unsigned AddrSpace)
const {
7129 return AM.
Scale != 0;
7143 if (ST->hasXOP() && (Bits == 8 || Bits == 16 || Bits == 32 || Bits == 64))
7148 if (ST->
hasAVX2() && (Bits == 32 || Bits == 64))
7152 if (ST->hasBWI() && Bits == 16)
7161 Type *ScalarValTy)
const {
7162 if (ST->hasF16C() && ScalarMemTy->
isHalfTy()) {
7176 if (
I->getOpcode() == Instruction::Mul &&
7178 for (
auto &
Op :
I->operands()) {
7180 if (
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
7188 Ops.
push_back(&cast<Instruction>(
Op)->getOperandUse(0));
7197 return !Ops.
empty();
7203 int ShiftAmountOpNum = -1;
7205 ShiftAmountOpNum = 1;
7206 else if (
auto *
II = dyn_cast<IntrinsicInst>(
I)) {
7207 if (
II->getIntrinsicID() == Intrinsic::fshl ||
7208 II->getIntrinsicID() == Intrinsic::fshr)
7209 ShiftAmountOpNum = 2;
7212 if (ShiftAmountOpNum == -1)
7215 auto *Shuf = dyn_cast<ShuffleVectorInst>(
I->getOperand(ShiftAmountOpNum));
7218 Ops.
push_back(&
I->getOperandUse(ShiftAmountOpNum));
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static unsigned getNumElements(Type *Ty)
This file implements the SmallBitVector class.
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned popcount() const
Count the number of bits set.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const override
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
size_type size() const
Returns the number of bits in this bitvector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the MVT corresponding to this LLVM type. See getValueType.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
static VectorType * getExtendedElementVectorType(VectorType *VTy)
This static method is like getInteger except that the element types are twice as wide as the elements...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Type * getElementType() const
bool useAVX512Regs() const
unsigned getPreferVectorWidth() const
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) const override
bool isLegalNTLoad(Type *DataType, Align Alignment) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace) const override
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
unsigned getRegisterClassForType(bool Vector, Type *Ty) const override
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool isLegalNTStore(Type *DataType, Align Alignment) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getInterleavedMemoryOpCostAVX512(unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isVectorShiftByScalarCheap(Type *Ty) const override
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Type) const override
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
unsigned getAtomicMemIntrinsicMaxElementSize() const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) const override
InstructionCost getBranchMispredictPenalty() const override
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const override
bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const override
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment) const
bool enableInterleavedAccessVectorization() const override
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
Calculate the cost of Gather / Scatter operation.
unsigned getNumberOfRegisters(unsigned ClassID) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace) const override
bool hasDivRemOp(Type *DataType, bool IsSigned) const override
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool supportsEfficientVectorElementLoadStore() const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
InstructionCost getIntImmCost(int64_t) const
Calculate the cost of materializing a 64-bit value.
InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, TTI::TargetCostKind CostKind, FastMathFlags FMF) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool canMacroFuseCmp() const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
bool prefersVectorizedAddressing() const override
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const override
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ BSWAP
Byte Swap and Counting operators.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SMULO
Same for multiplication.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt copy(R &&Range, OutputIt Out)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
unsigned RecipThroughputCost
std::optional< unsigned > operator[](TargetTransformInfo::TargetCostKind Kind) const
unsigned SizeAndLatencyCost
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.