260 if (Opcode == Instruction::Mul && Ty->isVectorTy() &&
261 Ty->getPrimitiveSizeInBits() <= 64 && Ty->getScalarSizeInBits() == 8) {
276 int ISD = TLI->InstructionOpcodeToISD(Opcode);
279 if (
ISD ==
ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
280 (LT.second.getScalarType() == MVT::i32 ||
281 LT.second.getScalarType() == MVT::i64)) {
283 bool Op1Signed =
false, Op2Signed =
false;
286 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
287 bool SignedMode = Op1Signed || Op2Signed;
292 if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
293 LT.second.getScalarType() == MVT::i32) {
299 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->hasSSE41()));
301 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->hasSSE41()));
303 bool IsZeroExtended = !Op1Signed || !Op2Signed;
304 bool IsConstant = Op1Constant || Op2Constant;
305 bool IsSext = Op1Sext || Op2Sext;
306 if (IsConstant || IsZeroExtended || IsSext)
314 if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
317 if (!SignedMode && OpMinSize <= 8)
321 if (!SignedMode && OpMinSize <= 16)
328 if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
381 {
ISD::SHL, MVT::v16i8, { 1, 6, 1, 2 } },
382 {
ISD::SRL, MVT::v16i8, { 1, 6, 1, 2 } },
383 {
ISD::SRA, MVT::v16i8, { 1, 6, 1, 2 } },
384 {
ISD::SHL, MVT::v32i8, { 1, 6, 1, 2 } },
385 {
ISD::SRL, MVT::v32i8, { 1, 6, 1, 2 } },
386 {
ISD::SRA, MVT::v32i8, { 1, 6, 1, 2 } },
387 {
ISD::SHL, MVT::v64i8, { 1, 6, 1, 2 } },
388 {
ISD::SRL, MVT::v64i8, { 1, 6, 1, 2 } },
389 {
ISD::SRA, MVT::v64i8, { 1, 6, 1, 2 } },
393 if (
const auto *Entry =
395 if (
auto KindCost = Entry->Cost[
CostKind])
396 return LT.first * *KindCost;
399 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
400 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
401 {
ISD::SRA, MVT::v16i8, { 1, 8, 4, 5 } },
402 {
ISD::SHL, MVT::v32i8, { 1, 8, 2, 3 } },
403 {
ISD::SRL, MVT::v32i8, { 1, 8, 2, 3 } },
404 {
ISD::SRA, MVT::v32i8, { 1, 9, 4, 5 } },
405 {
ISD::SHL, MVT::v64i8, { 1, 8, 2, 3 } },
406 {
ISD::SRL, MVT::v64i8, { 1, 8, 2, 3 } },
407 {
ISD::SRA, MVT::v64i8, { 1, 9, 4, 6 } },
409 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
410 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
411 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
412 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
413 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
414 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
418 if (
const auto *Entry =
420 if (
auto KindCost = Entry->Cost[
CostKind])
421 return LT.first * *KindCost;
424 {
ISD::SHL, MVT::v64i8, { 2, 12, 5, 6 } },
425 {
ISD::SRL, MVT::v64i8, { 2, 12, 5, 6 } },
426 {
ISD::SRA, MVT::v64i8, { 3, 10, 12, 12 } },
428 {
ISD::SHL, MVT::v16i16, { 2, 7, 4, 4 } },
429 {
ISD::SRL, MVT::v16i16, { 2, 7, 4, 4 } },
430 {
ISD::SRA, MVT::v16i16, { 2, 7, 4, 4 } },
432 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
433 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
434 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
435 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
436 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
437 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
439 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
440 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
441 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
442 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
443 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
444 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
445 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
454 if (
const auto *Entry =
456 if (
auto KindCost = Entry->Cost[
CostKind])
457 return LT.first * *KindCost;
460 {
ISD::SHL, MVT::v16i8, { 1, 8, 2, 3 } },
461 {
ISD::SRL, MVT::v16i8, { 1, 8, 2, 3 } },
462 {
ISD::SRA, MVT::v16i8, { 2, 10, 5, 6 } },
463 {
ISD::SHL, MVT::v32i8, { 2, 8, 2, 4 } },
464 {
ISD::SRL, MVT::v32i8, { 2, 8, 2, 4 } },
465 {
ISD::SRA, MVT::v32i8, { 3, 10, 5, 9 } },
467 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
468 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
469 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
470 {
ISD::SHL, MVT::v16i16,{ 2, 2, 1, 2 } },
471 {
ISD::SRL, MVT::v16i16,{ 2, 2, 1, 2 } },
472 {
ISD::SRA, MVT::v16i16,{ 2, 2, 1, 2 } },
474 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
475 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
476 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
477 {
ISD::SHL, MVT::v8i32, { 2, 2, 1, 2 } },
478 {
ISD::SRL, MVT::v8i32, { 2, 2, 1, 2 } },
479 {
ISD::SRA, MVT::v8i32, { 2, 2, 1, 2 } },
481 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
482 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
483 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
484 {
ISD::SHL, MVT::v4i64, { 2, 2, 1, 2 } },
485 {
ISD::SRL, MVT::v4i64, { 2, 2, 1, 2 } },
486 {
ISD::SRA, MVT::v4i64, { 4, 4, 3, 6 } },
495 if (
const auto *Entry =
497 if (
auto KindCost = Entry->Cost[
CostKind])
498 return LT.first * *KindCost;
501 {
ISD::SHL, MVT::v16i8, { 2, 7, 2, 3 } },
502 {
ISD::SRL, MVT::v16i8, { 2, 7, 2, 3 } },
503 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
504 {
ISD::SHL, MVT::v32i8, { 4, 7, 7, 8 } },
505 {
ISD::SRL, MVT::v32i8, { 4, 7, 7, 8 } },
506 {
ISD::SRA, MVT::v32i8, { 7, 7, 12, 13 } },
508 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 1 } },
509 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 1 } },
510 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 1 } },
511 {
ISD::SHL, MVT::v16i16,{ 3, 6, 4, 5 } },
512 {
ISD::SRL, MVT::v16i16,{ 3, 6, 4, 5 } },
513 {
ISD::SRA, MVT::v16i16,{ 3, 6, 4, 5 } },
515 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 1 } },
516 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 1 } },
517 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 1 } },
518 {
ISD::SHL, MVT::v8i32, { 3, 6, 4, 5 } },
519 {
ISD::SRL, MVT::v8i32, { 3, 6, 4, 5 } },
520 {
ISD::SRA, MVT::v8i32, { 3, 6, 4, 5 } },
522 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 1 } },
523 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 1 } },
524 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
525 {
ISD::SHL, MVT::v4i64, { 3, 6, 4, 5 } },
526 {
ISD::SRL, MVT::v4i64, { 3, 6, 4, 5 } },
527 {
ISD::SRA, MVT::v4i64, { 5, 7, 8, 9 } },
537 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
538 if (
const auto *Entry =
540 if (
auto KindCost = Entry->Cost[
CostKind])
541 return LT.first * *KindCost;
544 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
545 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
546 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
548 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
549 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
550 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
552 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
553 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
554 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
556 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
557 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
558 {
ISD::SRA, MVT::v2i64, { 3, 5, 6, 6 } },
568 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
569 if (
const auto *Entry =
571 if (
auto KindCost = Entry->Cost[
CostKind])
572 return LT.first * *KindCost;
587 if (
const auto *Entry =
589 if (
auto KindCost = Entry->Cost[
CostKind])
590 return LT.first * *KindCost;
610 if (
const auto *Entry =
612 if (
auto KindCost = Entry->Cost[
CostKind])
613 return LT.first * *KindCost;
634 if (
auto KindCost = Entry->Cost[
CostKind])
635 return LT.first * *KindCost;
656 if (
auto KindCost = Entry->Cost[
CostKind])
657 return LT.first * *KindCost;
665 if (
const auto *Entry =
667 if (
auto KindCost = Entry->Cost[
CostKind])
668 return LT.first * *KindCost;
689 if (
auto KindCost = Entry->Cost[
CostKind])
690 return LT.first * *KindCost;
693 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
694 {
ISD::SRL, MVT::v16i8, { 3,10, 5, 8 } },
695 {
ISD::SRA, MVT::v16i8, { 4,12, 8,12 } },
696 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
697 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
698 {
ISD::SRA, MVT::v32i8, { 5,10,10,13 } },
699 {
ISD::SHL, MVT::v64i8, { 4, 7, 6, 8 } },
700 {
ISD::SRL, MVT::v64i8, { 4, 8, 7,10 } },
701 {
ISD::SRA, MVT::v64i8, { 5,10,10,15 } },
703 {
ISD::SHL, MVT::v32i16, { 2, 4, 2, 3 } },
704 {
ISD::SRL, MVT::v32i16, { 2, 4, 2, 3 } },
705 {
ISD::SRA, MVT::v32i16, { 2, 4, 2, 3 } },
709 if (
const auto *Entry =
711 if (
auto KindCost = Entry->Cost[
CostKind])
712 return LT.first * *KindCost;
715 {
ISD::SHL, MVT::v32i16, { 5,10, 5, 7 } },
716 {
ISD::SRL, MVT::v32i16, { 5,10, 5, 7 } },
717 {
ISD::SRA, MVT::v32i16, { 5,10, 5, 7 } },
719 {
ISD::SHL, MVT::v16i32, { 2, 4, 2, 3 } },
720 {
ISD::SRL, MVT::v16i32, { 2, 4, 2, 3 } },
721 {
ISD::SRA, MVT::v16i32, { 2, 4, 2, 3 } },
723 {
ISD::SRA, MVT::v2i64, { 1, 2, 1, 2 } },
724 {
ISD::SHL, MVT::v4i64, { 1, 4, 1, 2 } },
725 {
ISD::SRL, MVT::v4i64, { 1, 4, 1, 2 } },
726 {
ISD::SRA, MVT::v4i64, { 1, 4, 1, 2 } },
727 {
ISD::SHL, MVT::v8i64, { 1, 4, 1, 2 } },
728 {
ISD::SRL, MVT::v8i64, { 1, 4, 1, 2 } },
729 {
ISD::SRA, MVT::v8i64, { 1, 4, 1, 2 } },
732 if (ST->hasAVX512() && Op2Info.
isUniform())
733 if (
const auto *Entry =
735 if (
auto KindCost = Entry->Cost[
CostKind])
736 return LT.first * *KindCost;
740 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
741 {
ISD::SRL, MVT::v16i8, { 3, 9, 5, 8 } },
742 {
ISD::SRA, MVT::v16i8, { 4, 5, 9,13 } },
743 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
744 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
745 {
ISD::SRA, MVT::v32i8, { 6, 9,11,16 } },
747 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 2 } },
748 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 2 } },
749 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 2 } },
750 {
ISD::SHL, MVT::v16i16, { 2, 4, 2, 3 } },
751 {
ISD::SRL, MVT::v16i16, { 2, 4, 2, 3 } },
752 {
ISD::SRA, MVT::v16i16, { 2, 4, 2, 3 } },
754 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 2 } },
755 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 2 } },
756 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 2 } },
757 {
ISD::SHL, MVT::v8i32, { 2, 4, 2, 3 } },
758 {
ISD::SRL, MVT::v8i32, { 2, 4, 2, 3 } },
759 {
ISD::SRA, MVT::v8i32, { 2, 4, 2, 3 } },
761 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 2 } },
762 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 2 } },
763 {
ISD::SRA, MVT::v2i64, { 2, 4, 5, 7 } },
764 {
ISD::SHL, MVT::v4i64, { 2, 4, 1, 2 } },
765 {
ISD::SRL, MVT::v4i64, { 2, 4, 1, 2 } },
766 {
ISD::SRA, MVT::v4i64, { 4, 6, 5, 9 } },
769 if (ST->hasAVX2() && Op2Info.
isUniform())
770 if (
const auto *Entry =
772 if (
auto KindCost = Entry->Cost[
CostKind])
773 return LT.first * *KindCost;
776 {
ISD::SHL, MVT::v16i8, { 4, 4, 6, 8 } },
777 {
ISD::SRL, MVT::v16i8, { 4, 8, 5, 8 } },
778 {
ISD::SRA, MVT::v16i8, { 6, 6, 9,13 } },
779 {
ISD::SHL, MVT::v32i8, { 7, 8,11,14 } },
780 {
ISD::SRL, MVT::v32i8, { 7, 9,10,14 } },
781 {
ISD::SRA, MVT::v32i8, { 10,11,16,21 } },
783 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 2 } },
784 {
ISD::SRL, MVT::v8i16, { 1, 3, 1, 2 } },
785 {
ISD::SRA, MVT::v8i16, { 1, 3, 1, 2 } },
786 {
ISD::SHL, MVT::v16i16, { 3, 7, 5, 7 } },
787 {
ISD::SRL, MVT::v16i16, { 3, 7, 5, 7 } },
788 {
ISD::SRA, MVT::v16i16, { 3, 7, 5, 7 } },
790 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 2 } },
791 {
ISD::SRL, MVT::v4i32, { 1, 3, 1, 2 } },
792 {
ISD::SRA, MVT::v4i32, { 1, 3, 1, 2 } },
793 {
ISD::SHL, MVT::v8i32, { 3, 7, 5, 7 } },
794 {
ISD::SRL, MVT::v8i32, { 3, 7, 5, 7 } },
795 {
ISD::SRA, MVT::v8i32, { 3, 7, 5, 7 } },
797 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 2 } },
798 {
ISD::SRL, MVT::v2i64, { 1, 3, 1, 2 } },
799 {
ISD::SRA, MVT::v2i64, { 3, 4, 5, 7 } },
800 {
ISD::SHL, MVT::v4i64, { 3, 7, 4, 6 } },
801 {
ISD::SRL, MVT::v4i64, { 3, 7, 4, 6 } },
802 {
ISD::SRA, MVT::v4i64, { 6, 7,10,13 } },
806 if (ST->hasAVX() && Op2Info.
isUniform() &&
807 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
808 if (
const auto *Entry =
810 if (
auto KindCost = Entry->Cost[
CostKind])
811 return LT.first * *KindCost;
815 {
ISD::SHL, MVT::v16i8, { 9, 10, 6, 9 } },
816 {
ISD::SRL, MVT::v16i8, { 9, 13, 5, 9 } },
817 {
ISD::SRA, MVT::v16i8, { 11, 15, 9,13 } },
819 {
ISD::SHL, MVT::v8i16, { 2, 2, 1, 2 } },
820 {
ISD::SRL, MVT::v8i16, { 2, 2, 1, 2 } },
821 {
ISD::SRA, MVT::v8i16, { 2, 2, 1, 2 } },
823 {
ISD::SHL, MVT::v4i32, { 2, 2, 1, 2 } },
824 {
ISD::SRL, MVT::v4i32, { 2, 2, 1, 2 } },
825 {
ISD::SRA, MVT::v4i32, { 2, 2, 1, 2 } },
827 {
ISD::SHL, MVT::v2i64, { 2, 2, 1, 2 } },
828 {
ISD::SRL, MVT::v2i64, { 2, 2, 1, 2 } },
829 {
ISD::SRA, MVT::v2i64, { 5, 9, 5, 7 } },
832 if (ST->hasSSE2() && Op2Info.
isUniform() &&
833 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
834 if (
const auto *Entry =
836 if (
auto KindCost = Entry->Cost[
CostKind])
837 return LT.first * *KindCost;
840 {
ISD::MUL, MVT::v2i64, { 2, 15, 1, 3 } },
841 {
ISD::MUL, MVT::v4i64, { 2, 15, 1, 3 } },
842 {
ISD::MUL, MVT::v8i64, { 3, 15, 1, 3 } }
848 if (
auto KindCost = Entry->Cost[
CostKind])
849 return LT.first * *KindCost;
852 {
ISD::SHL, MVT::v16i8, { 4, 8, 4, 5 } },
853 {
ISD::SRL, MVT::v16i8, { 4, 8, 4, 5 } },
854 {
ISD::SRA, MVT::v16i8, { 4, 8, 4, 5 } },
855 {
ISD::SHL, MVT::v32i8, { 4, 23,11,16 } },
856 {
ISD::SRL, MVT::v32i8, { 4, 30,12,18 } },
857 {
ISD::SRA, MVT::v32i8, { 6, 13,24,30 } },
858 {
ISD::SHL, MVT::v64i8, { 6, 19,13,15 } },
859 {
ISD::SRL, MVT::v64i8, { 7, 27,15,18 } },
860 {
ISD::SRA, MVT::v64i8, { 15, 15,30,30 } },
862 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
863 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
864 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
865 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
866 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
867 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
868 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
869 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
870 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
872 {
ISD::ADD, MVT::v64i8, { 1, 1, 1, 1 } },
873 {
ISD::ADD, MVT::v32i16, { 1, 1, 1, 1 } },
875 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 1 } },
876 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 1 } },
877 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 1 } },
878 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 1 } },
880 {
ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } },
881 {
ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } },
883 {
ISD::MUL, MVT::v16i8, { 4, 12, 4, 5 } },
884 {
ISD::MUL, MVT::v32i8, { 3, 10, 7,10 } },
885 {
ISD::MUL, MVT::v64i8, { 3, 11, 7,10 } },
886 {
ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } },
888 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } },
889 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 1 } },
890 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 1 } },
891 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 1 } },
897 if (
auto KindCost = Entry->Cost[
CostKind])
898 return LT.first * *KindCost;
901 {
ISD::SHL, MVT::v64i8, { 15, 19,27,33 } },
902 {
ISD::SRL, MVT::v64i8, { 15, 19,30,36 } },
903 {
ISD::SRA, MVT::v64i8, { 37, 37,51,63 } },
905 {
ISD::SHL, MVT::v32i16, { 11, 16,11,15 } },
906 {
ISD::SRL, MVT::v32i16, { 11, 16,11,15 } },
907 {
ISD::SRA, MVT::v32i16, { 11, 16,11,15 } },
909 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
910 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
911 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
912 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
913 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
914 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
915 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
916 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
917 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
919 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
920 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
921 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
922 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
923 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
924 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
925 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
926 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
927 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
929 {
ISD::ADD, MVT::v64i8, { 3, 7, 5, 5 } },
930 {
ISD::ADD, MVT::v32i16, { 3, 7, 5, 5 } },
932 {
ISD::SUB, MVT::v64i8, { 3, 7, 5, 5 } },
933 {
ISD::SUB, MVT::v32i16, { 3, 7, 5, 5 } },
935 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 1 } },
936 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 1 } },
937 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 1 } },
938 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 1 } },
940 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 1 } },
941 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 1 } },
942 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 1 } },
943 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 1 } },
945 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 1 } },
946 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 1 } },
947 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 1 } },
948 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 1 } },
950 {
ISD::MUL, MVT::v16i32, { 1, 10, 1, 2 } },
951 {
ISD::MUL, MVT::v8i32, { 1, 10, 1, 2 } },
952 {
ISD::MUL, MVT::v4i32, { 1, 10, 1, 2 } },
953 {
ISD::MUL, MVT::v8i64, { 6, 9, 8, 8 } },
958 { ISD::FNEG, MVT::v8f64, { 1, 1, 1, 2 } },
959 {
ISD::FADD, MVT::v8f64, { 1, 4, 1, 1 } },
960 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } },
961 {
ISD::FSUB, MVT::v8f64, { 1, 4, 1, 1 } },
962 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 1 } },
963 {
ISD::FMUL, MVT::v8f64, { 1, 4, 1, 1 } },
964 {
ISD::FMUL, MVT::v4f64, { 1, 4, 1, 1 } },
965 {
ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } },
968 {
ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } },
969 {
ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } },
970 {
ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } },
971 {
ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } },
973 { ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } },
974 {
ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } },
975 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 1 } },
976 {
ISD::FSUB, MVT::v16f32, { 1, 4, 1, 1 } },
977 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 1 } },
978 {
ISD::FMUL, MVT::v16f32, { 1, 4, 1, 1 } },
979 {
ISD::FMUL, MVT::v8f32, { 1, 4, 1, 1 } },
980 {
ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } },
983 {
ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } },
984 {
ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } },
985 {
ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } },
986 {
ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } },
991 if (
auto KindCost = Entry->Cost[
CostKind])
992 return LT.first * *KindCost;
997 {
ISD::SHL, MVT::v4i32, { 2, 3, 1, 3 } },
998 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 3 } },
999 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 3 } },
1000 {
ISD::SHL, MVT::v8i32, { 4, 4, 1, 3 } },
1001 {
ISD::SRL, MVT::v8i32, { 4, 4, 1, 3 } },
1002 {
ISD::SRA, MVT::v8i32, { 4, 4, 1, 3 } },
1003 {
ISD::SHL, MVT::v2i64, { 2, 3, 1, 1 } },
1004 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
1005 {
ISD::SHL, MVT::v4i64, { 4, 4, 1, 2 } },
1006 {
ISD::SRL, MVT::v4i64, { 4, 4, 1, 2 } },
1009 if (ST->hasAVX512()) {
1018 if (ST->hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) {
1019 if (
ISD ==
ISD::SHL && LT.second == MVT::v16i16 &&
1027 if (
auto KindCost = Entry->Cost[
CostKind])
1028 return LT.first * *KindCost;
1033 {
ISD::SHL, MVT::v16i8, { 1, 3, 1, 1 } },
1034 {
ISD::SRL, MVT::v16i8, { 2, 3, 1, 1 } },
1035 {
ISD::SRA, MVT::v16i8, { 2, 3, 1, 1 } },
1036 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 1 } },
1037 {
ISD::SRL, MVT::v8i16, { 2, 3, 1, 1 } },
1038 {
ISD::SRA, MVT::v8i16, { 2, 3, 1, 1 } },
1039 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 1 } },
1040 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 1 } },
1041 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 1 } },
1042 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 1 } },
1043 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
1044 {
ISD::SRA, MVT::v2i64, { 2, 3, 1, 1 } },
1046 {
ISD::SHL, MVT::v32i8, { 4, 7, 5, 6 } },
1047 {
ISD::SRL, MVT::v32i8, { 6, 7, 5, 6 } },
1048 {
ISD::SRA, MVT::v32i8, { 6, 7, 5, 6 } },
1049 {
ISD::SHL, MVT::v16i16, { 4, 7, 5, 6 } },
1050 {
ISD::SRL, MVT::v16i16, { 6, 7, 5, 6 } },
1051 {
ISD::SRA, MVT::v16i16, { 6, 7, 5, 6 } },
1052 {
ISD::SHL, MVT::v8i32, { 4, 7, 5, 6 } },
1053 {
ISD::SRL, MVT::v8i32, { 6, 7, 5, 6 } },
1054 {
ISD::SRA, MVT::v8i32, { 6, 7, 5, 6 } },
1055 {
ISD::SHL, MVT::v4i64, { 4, 7, 5, 6 } },
1056 {
ISD::SRL, MVT::v4i64, { 6, 7, 5, 6 } },
1057 {
ISD::SRA, MVT::v4i64, { 6, 7, 5, 6 } },
1067 if (
const auto *Entry =
1069 if (
auto KindCost = Entry->Cost[
CostKind])
1070 return LT.first * *KindCost;
1077 if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->hasSSE2()) ||
1078 ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->hasAVX()))
1083 {
ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } },
1084 {
ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } },
1085 {
ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } },
1086 {
ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } },
1089 if (ST->useGLMDivSqrtCosts())
1091 if (
auto KindCost = Entry->Cost[
CostKind])
1092 return LT.first * *KindCost;
1095 {
ISD::MUL, MVT::v4i32, { 11, 11, 1, 7 } },
1096 {
ISD::MUL, MVT::v8i16, { 2, 5, 1, 1 } },
1097 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1098 {
ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } },
1099 {
ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } },
1100 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1101 {
ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } },
1102 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } },
1103 {
ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } },
1104 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } },
1105 {
ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } },
1106 {
ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } },
1112 {
ISD::MUL, MVT::v2i64, { 17, 22, 9, 9 } },
1114 {
ISD::ADD, MVT::v2i64, { 4, 2, 1, 2 } },
1115 {
ISD::SUB, MVT::v2i64, { 4, 2, 1, 2 } },
1118 if (ST->useSLMArithCosts())
1120 if (
auto KindCost = Entry->Cost[
CostKind])
1121 return LT.first * *KindCost;
1124 {
ISD::SHL, MVT::v16i8, { 6, 21,11,16 } },
1125 {
ISD::SHL, MVT::v32i8, { 6, 23,11,22 } },
1126 {
ISD::SHL, MVT::v8i16, { 5, 18, 5,10 } },
1127 {
ISD::SHL, MVT::v16i16, { 8, 10,10,14 } },
1129 {
ISD::SRL, MVT::v16i8, { 6, 27,12,18 } },
1130 {
ISD::SRL, MVT::v32i8, { 8, 30,12,24 } },
1131 {
ISD::SRL, MVT::v8i16, { 5, 11, 5,10 } },
1132 {
ISD::SRL, MVT::v16i16, { 8, 10,10,14 } },
1134 {
ISD::SRA, MVT::v16i8, { 17, 17,24,30 } },
1135 {
ISD::SRA, MVT::v32i8, { 18, 20,24,43 } },
1136 {
ISD::SRA, MVT::v8i16, { 5, 11, 5,10 } },
1137 {
ISD::SRA, MVT::v16i16, { 8, 10,10,14 } },
1138 {
ISD::SRA, MVT::v2i64, { 4, 5, 5, 5 } },
1139 {
ISD::SRA, MVT::v4i64, { 8, 8, 5, 9 } },
1141 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 2 } },
1142 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 2 } },
1143 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 2 } },
1144 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 2 } },
1145 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 2 } },
1146 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 2 } },
1147 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 2 } },
1148 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } },
1150 {
ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } },
1151 {
ISD::MUL, MVT::v32i8, { 4, 8, 8,16 } },
1152 {
ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } },
1153 {
ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } },
1154 {
ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } },
1155 {
ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } },
1156 {
ISD::MUL, MVT::v2i64, { 6, 10, 8, 8 } },
1160 { ISD::FNEG, MVT::v4f64, { 1, 1, 1, 2 } },
1161 { ISD::FNEG, MVT::v8f32, { 1, 1, 1, 2 } },
1163 {
ISD::FADD, MVT::f64, { 1, 4, 1, 1 } },
1164 {
ISD::FADD, MVT::f32, { 1, 4, 1, 1 } },
1165 {
ISD::FADD, MVT::v2f64, { 1, 4, 1, 1 } },
1166 {
ISD::FADD, MVT::v4f32, { 1, 4, 1, 1 } },
1167 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 2 } },
1168 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 2 } },
1170 {
ISD::FSUB, MVT::f64, { 1, 4, 1, 1 } },
1171 {
ISD::FSUB, MVT::f32, { 1, 4, 1, 1 } },
1172 {
ISD::FSUB, MVT::v2f64, { 1, 4, 1, 1 } },
1173 {
ISD::FSUB, MVT::v4f32, { 1, 4, 1, 1 } },
1174 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 2 } },
1175 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 2 } },
1177 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1178 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1179 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1180 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1181 {
ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } },
1182 {
ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } },
1184 {
ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } },
1185 {
ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } },
1186 {
ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } },
1187 {
ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } },
1188 {
ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } },
1189 {
ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } },
1195 if (
auto KindCost = Entry->Cost[
CostKind])
1196 return LT.first * *KindCost;
1202 {
ISD::MUL, MVT::v32i8, { 10, 11, 18, 19 } },
1203 {
ISD::MUL, MVT::v16i8, { 5, 6, 8, 12 } },
1204 {
ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } },
1205 {
ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } },
1206 {
ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } },
1207 {
ISD::MUL, MVT::v4i64, { 12, 15, 19, 20 } },
1211 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 2 } },
1212 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 2 } },
1213 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 2 } },
1214 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 2 } },
1216 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 2 } },
1217 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 2 } },
1218 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 2 } },
1219 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 2 } },
1221 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 2 } },
1222 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 2 } },
1223 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 2 } },
1224 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 2 } },
1226 {
ISD::SUB, MVT::v32i8, { 4, 2, 5, 6 } },
1227 {
ISD::ADD, MVT::v32i8, { 4, 2, 5, 6 } },
1228 {
ISD::SUB, MVT::v16i16, { 4, 2, 5, 6 } },
1229 {
ISD::ADD, MVT::v16i16, { 4, 2, 5, 6 } },
1230 {
ISD::SUB, MVT::v8i32, { 4, 2, 5, 6 } },
1231 {
ISD::ADD, MVT::v8i32, { 4, 2, 5, 6 } },
1232 {
ISD::SUB, MVT::v4i64, { 4, 2, 5, 6 } },
1233 {
ISD::ADD, MVT::v4i64, { 4, 2, 5, 6 } },
1234 {
ISD::SUB, MVT::v2i64, { 1, 1, 1, 1 } },
1235 {
ISD::ADD, MVT::v2i64, { 1, 1, 1, 1 } },
1237 {
ISD::SHL, MVT::v16i8, { 10, 21,11,17 } },
1238 {
ISD::SHL, MVT::v32i8, { 22, 22,27,40 } },
1239 {
ISD::SHL, MVT::v8i16, { 6, 9,11,11 } },
1240 {
ISD::SHL, MVT::v16i16, { 13, 16,24,25 } },
1241 {
ISD::SHL, MVT::v4i32, { 3, 11, 4, 6 } },
1242 {
ISD::SHL, MVT::v8i32, { 9, 11,12,17 } },
1243 {
ISD::SHL, MVT::v2i64, { 2, 4, 4, 6 } },
1244 {
ISD::SHL, MVT::v4i64, { 6, 7,11,15 } },
1246 {
ISD::SRL, MVT::v16i8, { 11, 27,12,18 } },
1247 {
ISD::SRL, MVT::v32i8, { 23, 23,30,43 } },
1248 {
ISD::SRL, MVT::v8i16, { 13, 16,14,22 } },
1249 {
ISD::SRL, MVT::v16i16, { 28, 30,31,48 } },
1250 {
ISD::SRL, MVT::v4i32, { 6, 7,12,16 } },
1251 {
ISD::SRL, MVT::v8i32, { 14, 14,26,34 } },
1252 {
ISD::SRL, MVT::v2i64, { 2, 4, 4, 6 } },
1253 {
ISD::SRL, MVT::v4i64, { 6, 7,11,15 } },
1255 {
ISD::SRA, MVT::v16i8, { 21, 22,24,36 } },
1256 {
ISD::SRA, MVT::v32i8, { 44, 45,51,76 } },
1257 {
ISD::SRA, MVT::v8i16, { 13, 16,14,22 } },
1258 {
ISD::SRA, MVT::v16i16, { 28, 30,31,48 } },
1259 {
ISD::SRA, MVT::v4i32, { 6, 7,12,16 } },
1260 {
ISD::SRA, MVT::v8i32, { 14, 14,26,34 } },
1261 {
ISD::SRA, MVT::v2i64, { 5, 6,10,14 } },
1262 {
ISD::SRA, MVT::v4i64, { 12, 12,22,30 } },
1264 { ISD::FNEG, MVT::v4f64, { 2, 2, 1, 2 } },
1265 { ISD::FNEG, MVT::v8f32, { 2, 2, 1, 2 } },
1267 {
ISD::FADD, MVT::f64, { 1, 5, 1, 1 } },
1268 {
ISD::FADD, MVT::f32, { 1, 5, 1, 1 } },
1269 {
ISD::FADD, MVT::v2f64, { 1, 5, 1, 1 } },
1270 {
ISD::FADD, MVT::v4f32, { 1, 5, 1, 1 } },
1271 {
ISD::FADD, MVT::v4f64, { 2, 5, 1, 2 } },
1272 {
ISD::FADD, MVT::v8f32, { 2, 5, 1, 2 } },
1274 {
ISD::FSUB, MVT::f64, { 1, 5, 1, 1 } },
1275 {
ISD::FSUB, MVT::f32, { 1, 5, 1, 1 } },
1276 {
ISD::FSUB, MVT::v2f64, { 1, 5, 1, 1 } },
1277 {
ISD::FSUB, MVT::v4f32, { 1, 5, 1, 1 } },
1278 {
ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } },
1279 {
ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } },
1281 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1282 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1283 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1284 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1285 {
ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } },
1286 {
ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } },
1288 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1289 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1290 {
ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } },
1291 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1292 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1293 {
ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } },
1298 if (
auto KindCost = Entry->Cost[
CostKind])
1299 return LT.first * *KindCost;
1302 {
ISD::FADD, MVT::f64, { 1, 3, 1, 1 } },
1303 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1304 {
ISD::FADD, MVT::v2f64, { 1, 3, 1, 1 } },
1305 {
ISD::FADD, MVT::v4f32, { 1, 3, 1, 1 } },
1307 {
ISD::FSUB, MVT::f64, { 1, 3, 1, 1 } },
1308 {
ISD::FSUB, MVT::f32 , { 1, 3, 1, 1 } },
1309 {
ISD::FSUB, MVT::v2f64, { 1, 3, 1, 1 } },
1310 {
ISD::FSUB, MVT::v4f32, { 1, 3, 1, 1 } },
1312 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1313 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1314 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1315 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1317 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1318 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1319 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1320 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1322 {
ISD::MUL, MVT::v2i64, { 6, 10,10,10 } }
1327 if (
auto KindCost = Entry->Cost[
CostKind])
1328 return LT.first * *KindCost;
1331 {
ISD::SHL, MVT::v16i8, { 15, 24,17,22 } },
1332 {
ISD::SHL, MVT::v8i16, { 11, 14,11,11 } },
1333 {
ISD::SHL, MVT::v4i32, { 14, 20, 4,10 } },
1335 {
ISD::SRL, MVT::v16i8, { 16, 27,18,24 } },
1336 {
ISD::SRL, MVT::v8i16, { 22, 26,23,27 } },
1337 {
ISD::SRL, MVT::v4i32, { 16, 17,15,19 } },
1338 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1340 {
ISD::SRA, MVT::v16i8, { 38, 41,30,36 } },
1341 {
ISD::SRA, MVT::v8i16, { 22, 26,23,27 } },
1342 {
ISD::SRA, MVT::v4i32, { 16, 17,15,19 } },
1343 {
ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } },
1345 {
ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } }
1350 if (
auto KindCost = Entry->Cost[
CostKind])
1351 return LT.first * *KindCost;
1354 {
ISD::MUL, MVT::v16i8, { 5, 18,10,12 } },
1359 if (
auto KindCost = Entry->Cost[
CostKind])
1360 return LT.first * *KindCost;
1365 {
ISD::SHL, MVT::v16i8, { 13, 21,26,28 } },
1366 {
ISD::SHL, MVT::v8i16, { 24, 27,16,20 } },
1367 {
ISD::SHL, MVT::v4i32, { 17, 19,10,12 } },
1368 {
ISD::SHL, MVT::v2i64, { 4, 6, 5, 7 } },
1370 {
ISD::SRL, MVT::v16i8, { 14, 28,27,30 } },
1371 {
ISD::SRL, MVT::v8i16, { 16, 19,31,31 } },
1372 {
ISD::SRL, MVT::v4i32, { 12, 12,15,19 } },
1373 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1375 {
ISD::SRA, MVT::v16i8, { 27, 30,54,54 } },
1376 {
ISD::SRA, MVT::v8i16, { 16, 19,31,31 } },
1377 {
ISD::SRA, MVT::v4i32, { 12, 12,15,19 } },
1378 {
ISD::SRA, MVT::v2i64, { 8, 11,12,16 } },
1380 {
ISD::AND, MVT::v16i8, { 1, 1, 1, 1 } },
1381 {
ISD::AND, MVT::v8i16, { 1, 1, 1, 1 } },
1382 {
ISD::AND, MVT::v4i32, { 1, 1, 1, 1 } },
1383 {
ISD::AND, MVT::v2i64, { 1, 1, 1, 1 } },
1385 {
ISD::OR, MVT::v16i8, { 1, 1, 1, 1 } },
1386 {
ISD::OR, MVT::v8i16, { 1, 1, 1, 1 } },
1387 {
ISD::OR, MVT::v4i32, { 1, 1, 1, 1 } },
1388 {
ISD::OR, MVT::v2i64, { 1, 1, 1, 1 } },
1390 {
ISD::XOR, MVT::v16i8, { 1, 1, 1, 1 } },
1391 {
ISD::XOR, MVT::v8i16, { 1, 1, 1, 1 } },
1392 {
ISD::XOR, MVT::v4i32, { 1, 1, 1, 1 } },
1393 {
ISD::XOR, MVT::v2i64, { 1, 1, 1, 1 } },
1395 {
ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } },
1396 {
ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } },
1398 {
ISD::MUL, MVT::v16i8, { 6, 18,12,12 } },
1399 {
ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } },
1400 {
ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } },
1401 {
ISD::MUL, MVT::v2i64, { 7, 10,10,10 } },
1405 {
ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } },
1406 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } },
1407 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1408 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } },
1410 { ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } },
1411 { ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } },
1412 { ISD::FNEG, MVT::v4f32, { 1, 1, 1, 1 } },
1413 { ISD::FNEG, MVT::v2f64, { 1, 1, 1, 1 } },
1415 {
ISD::FADD, MVT::f32, { 2, 3, 1, 1 } },
1416 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1417 {
ISD::FADD, MVT::v2f64, { 2, 3, 1, 1 } },
1419 {
ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } },
1420 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1421 {
ISD::FSUB, MVT::v2f64, { 2, 3, 1, 1 } },
1423 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1424 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1429 if (
auto KindCost = Entry->Cost[
CostKind])
1430 return LT.first * *KindCost;
1433 {
ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } },
1434 {
ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } },
1436 { ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } },
1437 { ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } },
1439 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1440 {
ISD::FADD, MVT::v4f32, { 2, 3, 1, 1 } },
1442 {
ISD::FSUB, MVT::f32, { 1, 3, 1, 1 } },
1443 {
ISD::FSUB, MVT::v4f32, { 2, 3, 1, 1 } },
1445 {
ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } },
1446 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1451 if (
auto KindCost = Entry->Cost[
CostKind])
1452 return LT.first * *KindCost;
1457 {
ISD::MUL, MVT::i64, { 2, 6, 1, 2 } },
1462 if (
auto KindCost = Entry->Cost[
CostKind])
1463 return LT.first * *KindCost;
1474 {
ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
1475 {
ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
1476 {
ISD::MUL, MVT::i32, { 1, 4, 1, 1 } },
1478 { ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } },
1479 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1480 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1481 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1482 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1486 if (
auto KindCost = Entry->Cost[
CostKind])
1487 return LT.first * *KindCost;
1501 return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
1543 "Expected the Mask to match the return size if given");
1545 "Expected the same scalar types");
1554 if (!Args.empty() &&
1560 Mask.size() == (2 * SrcTy->getElementCount().getKnownMinValue()) &&
1581 (ST->hasAVX() && LT.second.getScalarSizeInBits() >= 32)))
1588 bool IsInLaneShuffle =
false;
1589 bool IsSingleElementMask =
false;
1590 if (SrcTy->getPrimitiveSizeInBits() > 0 &&
1591 (SrcTy->getPrimitiveSizeInBits() % 128) == 0 &&
1592 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
1593 Mask.size() == SrcTy->getElementCount().getKnownMinValue()) {
1594 unsigned NumLanes = SrcTy->getPrimitiveSizeInBits() / 128;
1595 unsigned NumEltsPerLane = Mask.size() / NumLanes;
1596 if ((Mask.size() % NumLanes) == 0) {
1599 ((
P.value() % Mask.size()) / NumEltsPerLane) ==
1600 (
P.index() / NumEltsPerLane);
1602 IsSingleElementMask =
1603 (Mask.size() - 1) ==
static_cast<unsigned>(
count_if(Mask, [](
int M) {
1610 if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
1611 LT.second = LT.second.changeVectorElementType(MVT::f16);
1616 int NumElts = LT.second.getVectorNumElements();
1617 if ((Index % NumElts) == 0)
1620 if (SubLT.second.isVector()) {
1621 int NumSubElts = SubLT.second.getVectorNumElements();
1622 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1631 if (NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 &&
1632 (NumSubElts % OrigSubElts) == 0 &&
1633 LT.second.getVectorElementType() ==
1634 SubLT.second.getVectorElementType() &&
1635 LT.second.getVectorElementType().getSizeInBits() ==
1636 SrcTy->getElementType()->getPrimitiveSizeInBits()) {
1637 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1638 "Unexpected number of elements!");
1640 LT.second.getVectorNumElements());
1642 SubLT.second.getVectorNumElements());
1643 int ExtractIndex =
alignDown((Index % NumElts), NumSubElts);
1646 ExtractIndex, SubTy);
1651 return ExtractCost + 1;
1654 "Unexpected vector size");
1656 return ExtractCost + 2;
1670 int NumElts = DstLT.second.getVectorNumElements();
1672 if (SubLT.second.isVector()) {
1673 int NumSubElts = SubLT.second.getVectorNumElements();
1674 bool MatchingTypes =
1675 NumElts == NumSubElts &&
1677 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1684 if (LT.first == 1 && LT.second == MVT::v4f32 && SubLT.first == 1 &&
1685 SubLT.second == MVT::f32 && (Index == 0 || ST->hasSSE41()))
1696 EVT VT = TLI->getValueType(
DL, SrcTy);
1730 if (
const auto *Entry =
1732 if (
auto KindCost = Entry->Cost[
CostKind])
1733 return LT.first * *KindCost;
1739 if (LT.first != 1) {
1740 MVT LegalVT = LT.second;
1743 SrcTy->getElementType()->getPrimitiveSizeInBits() &&
1746 unsigned VecTySize =
DL.getTypeStoreSize(SrcTy);
1749 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1756 if (!Mask.empty() && NumOfDests.
isValid()) {
1774 unsigned E = NumOfDests.
getValue();
1775 unsigned NormalizedVF =
1781 unsigned PrevSrcReg = 0;
1785 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1786 [
this, SingleOpTy,
CostKind, &PrevSrcReg, &PrevRegMask,
1791 if (PrevRegMask.
empty() || PrevSrcReg != SrcReg ||
1792 PrevRegMask != RegMask)
1795 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
1801 if (SrcReg != DestReg &&
1806 PrevSrcReg = SrcReg;
1807 PrevRegMask = RegMask;
1813 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
1830 if (LT.first == 1 && IsInLaneShuffle && IsSingleElementMask)
1844 if (
const auto *Entry =
1846 if (
auto KindCost = Entry->Cost[
CostKind])
1847 return LT.first * *KindCost;
1881 if (
const auto *Entry =
1883 if (
auto KindCost = Entry->Cost[
CostKind])
1884 return LT.first * *KindCost;
1967 if (ST->hasAVX512())
1968 if (
const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1969 if (
auto KindCost = Entry->Cost[
CostKind])
1970 return LT.first * *KindCost;
1986 if (IsInLaneShuffle && ST->hasAVX2())
1987 if (
const auto *Entry =
1989 if (
auto KindCost = Entry->Cost[
CostKind])
1990 return LT.first * *KindCost;
2040 if (
const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
2041 if (
auto KindCost = Entry->Cost[
CostKind])
2042 return LT.first * *KindCost;
2064 if (
const auto *Entry =
CostTableLookup(XOPShuffleTbl, Kind, LT.second))
2065 if (
auto KindCost = Entry->Cost[
CostKind])
2066 return LT.first * *KindCost;
2093 if (IsInLaneShuffle && ST->hasAVX())
2094 if (
const auto *Entry =
2096 if (
auto KindCost = Entry->Cost[
CostKind])
2097 return LT.first * *KindCost;
2159 if (
const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
2160 if (
auto KindCost = Entry->Cost[
CostKind])
2161 return LT.first * *KindCost;
2174 if (
const auto *Entry =
CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
2175 if (
auto KindCost = Entry->Cost[
CostKind])
2176 return LT.first * *KindCost;
2207 if (
const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
2208 if (
auto KindCost = Entry->Cost[
CostKind])
2209 return LT.first * *KindCost;
2263 if (ST->hasSSE2()) {
2266 if (ST->hasSSE3() && IsLoad)
2267 if (
const auto *Entry =
2270 LT.second.getVectorElementCount()) &&
2271 "Table entry missing from isLegalBroadcastLoad()");
2272 return LT.first * Entry->Cost;
2275 if (
const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
2276 if (
auto KindCost = Entry->Cost[
CostKind])
2277 return LT.first * *KindCost;
2289 if (ST->hasSSE1()) {
2290 if (LT.first == 1 && LT.second == MVT::v4f32 && Mask.size() == 4) {
2292 auto MatchSHUFPS = [](
int X,
int Y) {
2293 return X < 0 ||
Y < 0 || ((
X & 4) == (
Y & 4));
2295 if (MatchSHUFPS(Mask[0], Mask[1]) && MatchSHUFPS(Mask[2], Mask[3]))
2298 if (
const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
2299 if (
auto KindCost = Entry->Cost[
CostKind])
2300 return LT.first * *KindCost;
2312 int ISD = TLI->InstructionOpcodeToISD(Opcode);
2434 { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, { 1, 1, 1, 1 } },
2435 { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, { 3, 1, 1, 1 } },
2436 { ISD::FP_EXTEND, MVT::v16f64, MVT::v16f32, { 4, 1, 1, 1 } },
2437 { ISD::FP_EXTEND, MVT::v16f32, MVT::v16f16, { 1, 1, 1, 1 } },
2438 { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, { 2, 1, 1, 1 } },
2440 {
ISD::FP_ROUND, MVT::v16f16, MVT::v16f32, { 1, 1, 1, 1 } },
2462 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 2, 1, 1, 1 } },
2463 {
ISD::TRUNCATE, MVT::v32i16, MVT::v16i32, { 2, 1, 1, 1 } },
2810 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 4, 1, 1, 1 } },
2823 { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, { 3, 1, 1, 1 } },
2887 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 6, 1, 1, 1 } },
2953 { ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, { 1, 1, 1, 1 } },
3111 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, {10, 1, 1, 1 } },
3121 { ISD::FP_EXTEND, MVT::f32, MVT::f16, { 1, 1, 1, 1 } },
3122 { ISD::FP_EXTEND, MVT::f64, MVT::f16, { 2, 1, 1, 1 } },
3123 { ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, { 1, 1, 1, 1 } },
3124 { ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, { 1, 1, 1, 1 } },
3125 { ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, { 2, 1, 1, 1 } },
3129 EVT SrcTy = TLI->getValueType(
DL, Src);
3130 EVT DstTy = TLI->getValueType(
DL, Dst);
3133 if (SrcTy.isSimple() && DstTy.
isSimple()) {
3134 MVT SimpleSrcTy = SrcTy.getSimpleVT();
3137 if (ST->useAVX512Regs()) {
3140 AVX512BWConversionTbl,
ISD, SimpleDstTy, SimpleSrcTy))
3141 if (
auto KindCost = Entry->Cost[
CostKind])
3146 AVX512DQConversionTbl,
ISD, SimpleDstTy, SimpleSrcTy))
3147 if (
auto KindCost = Entry->Cost[
CostKind])
3150 if (ST->hasAVX512())
3152 AVX512FConversionTbl,
ISD, SimpleDstTy, SimpleSrcTy))
3153 if (
auto KindCost = Entry->Cost[
CostKind])
3159 AVX512BWVLConversionTbl,
ISD, SimpleDstTy, SimpleSrcTy))
3160 if (
auto KindCost = Entry->Cost[
CostKind])
3165 AVX512DQVLConversionTbl,
ISD, SimpleDstTy, SimpleSrcTy))
3166 if (
auto KindCost = Entry->Cost[
CostKind])
3169 if (ST->hasAVX512())
3171 SimpleDstTy, SimpleSrcTy))
3172 if (
auto KindCost = Entry->Cost[
CostKind])
3175 if (ST->hasAVX2()) {
3177 SimpleDstTy, SimpleSrcTy))
3178 if (
auto KindCost = Entry->Cost[
CostKind])
3184 SimpleDstTy, SimpleSrcTy))
3185 if (
auto KindCost = Entry->Cost[
CostKind])
3189 if (ST->hasF16C()) {
3191 SimpleDstTy, SimpleSrcTy))
3192 if (
auto KindCost = Entry->Cost[
CostKind])
3196 if (ST->hasSSE41()) {
3198 SimpleDstTy, SimpleSrcTy))
3199 if (
auto KindCost = Entry->Cost[
CostKind])
3203 if (ST->hasSSE2()) {
3205 SimpleDstTy, SimpleSrcTy))
3206 if (
auto KindCost = Entry->Cost[
CostKind])
3211 (
ISD == ISD::FP_EXTEND && SimpleSrcTy == MVT::f16)) {
3226 if (ST->useAVX512Regs()) {
3229 AVX512BWConversionTbl,
ISD, LTDest.second, LTSrc.second))
3230 if (
auto KindCost = Entry->Cost[
CostKind])
3231 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3235 AVX512DQConversionTbl,
ISD, LTDest.second, LTSrc.second))
3236 if (
auto KindCost = Entry->Cost[
CostKind])
3237 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3239 if (ST->hasAVX512())
3241 AVX512FConversionTbl,
ISD, LTDest.second, LTSrc.second))
3242 if (
auto KindCost = Entry->Cost[
CostKind])
3243 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3248 LTDest.second, LTSrc.second))
3249 if (
auto KindCost = Entry->Cost[
CostKind])
3250 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3254 LTDest.second, LTSrc.second))
3255 if (
auto KindCost = Entry->Cost[
CostKind])
3256 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3258 if (ST->hasAVX512())
3260 LTDest.second, LTSrc.second))
3261 if (
auto KindCost = Entry->Cost[
CostKind])
3262 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3266 LTDest.second, LTSrc.second))
3267 if (
auto KindCost = Entry->Cost[
CostKind])
3268 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3272 LTDest.second, LTSrc.second))
3273 if (
auto KindCost = Entry->Cost[
CostKind])
3274 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3276 if (ST->hasF16C()) {
3278 LTDest.second, LTSrc.second))
3279 if (
auto KindCost = Entry->Cost[
CostKind])
3280 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3285 LTDest.second, LTSrc.second))
3286 if (
auto KindCost = Entry->Cost[
CostKind])
3287 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3291 LTDest.second, LTSrc.second))
3292 if (
auto KindCost = Entry->Cost[
CostKind])
3293 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3298 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
3315 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
3326 return Cost == 0 ? 0 :
N;
3647 {
ISD::FSHL, MVT::v8i64, { 1, 1, 1, 1 } },
3648 {
ISD::FSHL, MVT::v4i64, { 1, 1, 1, 1 } },
3649 {
ISD::FSHL, MVT::v2i64, { 1, 1, 1, 1 } },
3650 {
ISD::FSHL, MVT::v16i32, { 1, 1, 1, 1 } },
3651 {
ISD::FSHL, MVT::v8i32, { 1, 1, 1, 1 } },
3652 {
ISD::FSHL, MVT::v4i32, { 1, 1, 1, 1 } },
3653 {
ISD::FSHL, MVT::v32i16, { 1, 1, 1, 1 } },
3654 {
ISD::FSHL, MVT::v16i16, { 1, 1, 1, 1 } },
3655 {
ISD::FSHL, MVT::v8i16, { 1, 1, 1, 1 } },
3656 {
ISD::ROTL, MVT::v32i16, { 1, 1, 1, 1 } },
3657 {
ISD::ROTL, MVT::v16i16, { 1, 1, 1, 1 } },
3658 {
ISD::ROTL, MVT::v8i16, { 1, 1, 1, 1 } },
3659 {
ISD::ROTR, MVT::v32i16, { 1, 1, 1, 1 } },
3660 {
ISD::ROTR, MVT::v16i16, { 1, 1, 1, 1 } },
3661 {
ISD::ROTR, MVT::v8i16, { 1, 1, 1, 1 } },
3683 {
ISD::CTLZ, MVT::v8i64, { 1, 5, 1, 1 } },
3684 {
ISD::CTLZ, MVT::v16i32, { 1, 5, 1, 1 } },
3685 {
ISD::CTLZ, MVT::v32i16, { 18, 27, 23, 27 } },
3686 {
ISD::CTLZ, MVT::v64i8, { 3, 16, 9, 11 } },
3687 {
ISD::CTLZ, MVT::v4i64, { 1, 5, 1, 1 } },
3688 {
ISD::CTLZ, MVT::v8i32, { 1, 5, 1, 1 } },
3689 {
ISD::CTLZ, MVT::v16i16, { 8, 19, 11, 13 } },
3690 {
ISD::CTLZ, MVT::v32i8, { 2, 11, 9, 10 } },
3691 {
ISD::CTLZ, MVT::v2i64, { 1, 5, 1, 1 } },
3692 {
ISD::CTLZ, MVT::v4i32, { 1, 5, 1, 1 } },
3693 {
ISD::CTLZ, MVT::v8i16, { 3, 15, 4, 6 } },
3694 {
ISD::CTLZ, MVT::v16i8, { 2, 10, 9, 10 } },
3696 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3697 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3698 {
ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } },
3699 {
ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } },
3700 {
ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } },
3701 {
ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } },
3704 {
ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
3705 {
ISD::ABS, MVT::v64i8, { 1, 1, 1, 1 } },
3727 {
ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
3728 {
ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
3729 {
ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
3730 {
ISD::CTLZ, MVT::v64i8, { 3, 12, 10, 9 } },
3731 {
ISD::CTPOP, MVT::v2i64, { 3, 7, 10, 10 } },
3732 {
ISD::CTPOP, MVT::v4i64, { 3, 7, 10, 10 } },
3733 {
ISD::CTPOP, MVT::v8i64, { 3, 8, 10, 12 } },
3734 {
ISD::CTPOP, MVT::v4i32, { 7, 11, 14, 14 } },
3735 {
ISD::CTPOP, MVT::v8i32, { 7, 11, 14, 14 } },
3736 {
ISD::CTPOP, MVT::v16i32, { 7, 12, 14, 16 } },
3737 {
ISD::CTPOP, MVT::v8i16, { 2, 7, 11, 11 } },
3738 {
ISD::CTPOP, MVT::v16i16, { 2, 7, 11, 11 } },
3739 {
ISD::CTPOP, MVT::v32i16, { 3, 7, 11, 13 } },
3743 {
ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } },
3744 {
ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } },
3745 {
ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } },
3746 {
ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } },
3747 {
ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } },
3748 {
ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } },
3749 {
ISD::ROTL, MVT::v32i16, { 2, 8, 6, 8 } },
3750 {
ISD::ROTL, MVT::v16i16, { 2, 8, 6, 7 } },
3751 {
ISD::ROTL, MVT::v8i16, { 2, 7, 6, 7 } },
3752 {
ISD::ROTL, MVT::v64i8, { 5, 6, 11, 12 } },
3753 {
ISD::ROTL, MVT::v32i8, { 5, 15, 7, 10 } },
3754 {
ISD::ROTL, MVT::v16i8, { 5, 15, 7, 10 } },
3755 {
ISD::ROTR, MVT::v32i16, { 2, 8, 6, 8 } },
3756 {
ISD::ROTR, MVT::v16i16, { 2, 8, 6, 7 } },
3757 {
ISD::ROTR, MVT::v8i16, { 2, 7, 6, 7 } },
3758 {
ISD::ROTR, MVT::v64i8, { 5, 6, 12, 14 } },
3759 {
ISD::ROTR, MVT::v32i8, { 5, 14, 6, 9 } },
3760 {
ISD::ROTR, MVT::v16i8, { 5, 14, 6, 9 } },
3769 {
ISD::SMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3770 {
ISD::SMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3771 {
ISD::SMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3772 {
ISD::SMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3774 {
ISD::SMULO, MVT::v64i8, { 8, 21, 17, 18 } },
3776 {
ISD::UMULO, MVT::v64i8, { 8, 15, 15, 16 } },
3781 {
ISD::UMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3782 {
ISD::UMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3783 {
ISD::UMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3784 {
ISD::UMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3789 {
ISD::ABS, MVT::v8i64, { 1, 1, 1, 1 } },
3790 {
ISD::ABS, MVT::v4i64, { 1, 1, 1, 1 } },
3791 {
ISD::ABS, MVT::v2i64, { 1, 1, 1, 1 } },
3792 {
ISD::ABS, MVT::v16i32, { 1, 1, 1, 1 } },
3793 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 1 } },
3794 {
ISD::ABS, MVT::v32i16, { 2, 7, 4, 4 } },
3795 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 1 } },
3796 {
ISD::ABS, MVT::v64i8, { 2, 7, 4, 4 } },
3797 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 1 } },
3805 {
ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
3806 {
ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
3807 {
ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
3808 {
ISD::CTLZ, MVT::v64i8, { 6, 11, 19, 19 } },
3809 {
ISD::CTPOP, MVT::v8i64, { 16, 16, 19, 19 } },
3810 {
ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } },
3811 {
ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } },
3812 {
ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } },
3813 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3814 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3815 {
ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } },
3816 {
ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } },
3817 {
ISD::ROTL, MVT::v8i64, { 1, 1, 1, 1 } },
3818 {
ISD::ROTL, MVT::v4i64, { 1, 1, 1, 1 } },
3819 {
ISD::ROTL, MVT::v2i64, { 1, 1, 1, 1 } },
3820 {
ISD::ROTL, MVT::v16i32, { 1, 1, 1, 1 } },
3821 {
ISD::ROTL, MVT::v8i32, { 1, 1, 1, 1 } },
3822 {
ISD::ROTL, MVT::v4i32, { 1, 1, 1, 1 } },
3823 {
ISD::ROTR, MVT::v8i64, { 1, 1, 1, 1 } },
3824 {
ISD::ROTR, MVT::v4i64, { 1, 1, 1, 1 } },
3825 {
ISD::ROTR, MVT::v2i64, { 1, 1, 1, 1 } },
3826 {
ISD::ROTR, MVT::v16i32, { 1, 1, 1, 1 } },
3827 {
ISD::ROTR, MVT::v8i32, { 1, 1, 1, 1 } },
3828 {
ISD::ROTR, MVT::v4i32, { 1, 1, 1, 1 } },
3843 {
ISD::SMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3844 {
ISD::SMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3845 {
ISD::SMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3846 {
ISD::SMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3847 {
ISD::SMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3848 {
ISD::SMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3849 {
ISD::SMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3850 {
ISD::SMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3851 {
ISD::SMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3852 {
ISD::SMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3853 {
ISD::SMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3854 {
ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3855 {
ISD::SMULO, MVT::v8i64, { 44, 44, 81, 93 } },
3856 {
ISD::SMULO, MVT::v16i32, { 5, 12, 9, 11 } },
3857 {
ISD::SMULO, MVT::v32i16, { 6, 12, 17, 17 } },
3858 {
ISD::SMULO, MVT::v64i8, { 22, 28, 42, 42 } },
3867 {
ISD::UMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3868 {
ISD::UMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3869 {
ISD::UMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3870 {
ISD::UMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3871 {
ISD::UMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3872 {
ISD::UMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3873 {
ISD::UMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3874 {
ISD::UMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3875 {
ISD::UMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3876 {
ISD::UMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3877 {
ISD::UMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3878 {
ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3879 {
ISD::UMULO, MVT::v8i64, { 52, 52, 95, 104} },
3880 {
ISD::UMULO, MVT::v16i32, { 5, 12, 8, 10 } },
3881 {
ISD::UMULO, MVT::v32i16, { 5, 13, 16, 16 } },
3882 {
ISD::UMULO, MVT::v64i8, { 18, 24, 30, 30 } },
3898 { ISD::FMAXNUM, MVT::f32, { 2, 2, 3, 3 } },
3899 { ISD::FMAXNUM, MVT::v4f32, { 1, 1, 3, 3 } },
3900 { ISD::FMAXNUM, MVT::v8f32, { 2, 2, 3, 3 } },
3901 { ISD::FMAXNUM, MVT::v16f32, { 4, 4, 3, 3 } },
3902 { ISD::FMAXNUM, MVT::f64, { 2, 2, 3, 3 } },
3903 { ISD::FMAXNUM, MVT::v2f64, { 1, 1, 3, 3 } },
3904 { ISD::FMAXNUM, MVT::v4f64, { 2, 2, 3, 3 } },
3905 { ISD::FMAXNUM, MVT::v8f64, { 3, 3, 3, 3 } },
3906 { ISD::FSQRT, MVT::f32, { 3, 12, 1, 1 } },
3907 { ISD::FSQRT, MVT::v4f32, { 3, 12, 1, 1 } },
3908 { ISD::FSQRT, MVT::v8f32, { 6, 12, 1, 1 } },
3909 { ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } },
3910 { ISD::FSQRT, MVT::f64, { 6, 18, 1, 1 } },
3911 { ISD::FSQRT, MVT::v2f64, { 6, 18, 1, 1 } },
3912 { ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } },
3913 { ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } },
3929 {
ISD::ROTL, MVT::v4i64, { 4, 7, 5, 6 } },
3930 {
ISD::ROTL, MVT::v8i32, { 4, 7, 5, 6 } },
3931 {
ISD::ROTL, MVT::v16i16, { 4, 7, 5, 6 } },
3932 {
ISD::ROTL, MVT::v32i8, { 4, 7, 5, 6 } },
3933 {
ISD::ROTL, MVT::v2i64, { 1, 3, 1, 1 } },
3934 {
ISD::ROTL, MVT::v4i32, { 1, 3, 1, 1 } },
3935 {
ISD::ROTL, MVT::v8i16, { 1, 3, 1, 1 } },
3936 {
ISD::ROTL, MVT::v16i8, { 1, 3, 1, 1 } },
3937 {
ISD::ROTR, MVT::v4i64, { 4, 7, 8, 9 } },
3938 {
ISD::ROTR, MVT::v8i32, { 4, 7, 8, 9 } },
3939 {
ISD::ROTR, MVT::v16i16, { 4, 7, 8, 9 } },
3940 {
ISD::ROTR, MVT::v32i8, { 4, 7, 8, 9 } },
3941 {
ISD::ROTR, MVT::v2i64, { 1, 3, 3, 3 } },
3942 {
ISD::ROTR, MVT::v4i32, { 1, 3, 3, 3 } },
3943 {
ISD::ROTR, MVT::v8i16, { 1, 3, 3, 3 } },
3944 {
ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } },
3955 {
ISD::ABS, MVT::v2i64, { 2, 4, 3, 5 } },
3956 {
ISD::ABS, MVT::v4i64, { 2, 4, 3, 5 } },
3957 {
ISD::ABS, MVT::v4i32, { 1, 1, 1, 1 } },
3958 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 2 } },
3959 {
ISD::ABS, MVT::v8i16, { 1, 1, 1, 1 } },
3960 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 2 } },
3961 {
ISD::ABS, MVT::v16i8, { 1, 1, 1, 1 } },
3962 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 2 } },
3977 {
ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
3978 {
ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
3979 {
ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
3980 {
ISD::CTLZ, MVT::v8i32, { 10, 16, 19, 34 } },
3981 {
ISD::CTLZ, MVT::v8i16, { 4, 13, 14, 15 } },
3982 {
ISD::CTLZ, MVT::v16i16, { 6, 14, 14, 24 } },
3983 {
ISD::CTLZ, MVT::v16i8, { 3, 12, 9, 10 } },
3984 {
ISD::CTLZ, MVT::v32i8, { 4, 12, 9, 14 } },
3985 {
ISD::CTPOP, MVT::v2i64, { 3, 9, 10, 10 } },
3986 {
ISD::CTPOP, MVT::v4i64, { 4, 9, 10, 14 } },
3987 {
ISD::CTPOP, MVT::v4i32, { 7, 12, 14, 14 } },
3988 {
ISD::CTPOP, MVT::v8i32, { 7, 12, 14, 18 } },
3989 {
ISD::CTPOP, MVT::v8i16, { 3, 7, 11, 11 } },
3990 {
ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } },
3993 {
ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } },
3994 {
ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } },
3995 {
ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } },
3996 {
ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } },
3997 {
ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } },
3998 {
ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } },
3999 {
ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } },
4000 {
ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } },
4007 {
ISD::SMAX, MVT::v2i64, { 2, 7, 2, 3 } },
4008 {
ISD::SMAX, MVT::v4i64, { 2, 7, 2, 3 } },
4009 {
ISD::SMAX, MVT::v8i32, { 1, 1, 1, 2 } },
4010 {
ISD::SMAX, MVT::v16i16, { 1, 1, 1, 2 } },
4011 {
ISD::SMAX, MVT::v32i8, { 1, 1, 1, 2 } },
4012 {
ISD::SMIN, MVT::v2i64, { 2, 7, 2, 3 } },
4013 {
ISD::SMIN, MVT::v4i64, { 2, 7, 2, 3 } },
4014 {
ISD::SMIN, MVT::v8i32, { 1, 1, 1, 2 } },
4015 {
ISD::SMIN, MVT::v16i16, { 1, 1, 1, 2 } },
4016 {
ISD::SMIN, MVT::v32i8, { 1, 1, 1, 2 } },
4017 {
ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
4018 {
ISD::SMULO, MVT::v2i64, { 8, 8, 13, 15 } },
4019 {
ISD::SMULO, MVT::v8i32, { 8, 20, 13, 24 } },
4020 {
ISD::SMULO, MVT::v4i32, { 5, 15, 11, 12 } },
4021 {
ISD::SMULO, MVT::v16i16, { 4, 14, 8, 14 } },
4023 {
ISD::SMULO, MVT::v32i8, { 9, 15, 18, 35 } },
4024 {
ISD::SMULO, MVT::v16i8, { 6, 22, 14, 21 } },
4036 {
ISD::UMAX, MVT::v2i64, { 2, 8, 5, 6 } },
4037 {
ISD::UMAX, MVT::v4i64, { 2, 8, 5, 8 } },
4038 {
ISD::UMAX, MVT::v8i32, { 1, 1, 1, 2 } },
4039 {
ISD::UMAX, MVT::v16i16, { 1, 1, 1, 2 } },
4040 {
ISD::UMAX, MVT::v32i8, { 1, 1, 1, 2 } },
4041 {
ISD::UMIN, MVT::v2i64, { 2, 8, 5, 6 } },
4042 {
ISD::UMIN, MVT::v4i64, { 2, 8, 5, 8 } },
4043 {
ISD::UMIN, MVT::v8i32, { 1, 1, 1, 2 } },
4044 {
ISD::UMIN, MVT::v16i16, { 1, 1, 1, 2 } },
4045 {
ISD::UMIN, MVT::v32i8, { 1, 1, 1, 2 } },
4046 {
ISD::UMULO, MVT::v4i64, { 24, 24, 39, 43 } },
4047 {
ISD::UMULO, MVT::v2i64, { 10, 10, 15, 19 } },
4048 {
ISD::UMULO, MVT::v8i32, { 8, 11, 13, 23 } },
4049 {
ISD::UMULO, MVT::v4i32, { 5, 12, 11, 12 } },
4050 {
ISD::UMULO, MVT::v16i16, { 4, 6, 8, 13 } },
4052 {
ISD::UMULO, MVT::v32i8, { 9, 13, 17, 33 } },
4053 {
ISD::UMULO, MVT::v16i8, { 6, 19, 13, 20 } },
4059 { ISD::FMAXNUM, MVT::f32, { 2, 7, 3, 5 } },
4060 { ISD::FMAXNUM, MVT::v4f32, { 2, 7, 3, 5 } },
4061 { ISD::FMAXNUM, MVT::v8f32, { 3, 7, 3, 6 } },
4062 { ISD::FMAXNUM, MVT::f64, { 2, 7, 3, 5 } },
4063 { ISD::FMAXNUM, MVT::v2f64, { 2, 7, 3, 5 } },
4064 { ISD::FMAXNUM, MVT::v4f64, { 3, 7, 3, 6 } },
4065 { ISD::FSQRT, MVT::f32, { 7, 15, 1, 1 } },
4066 { ISD::FSQRT, MVT::v4f32, { 7, 15, 1, 1 } },
4067 { ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } },
4068 { ISD::FSQRT, MVT::f64, { 14, 21, 1, 1 } },
4069 { ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } },
4070 { ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } },
4073 {
ISD::ABS, MVT::v4i64, { 6, 8, 6, 12 } },
4074 {
ISD::ABS, MVT::v8i32, { 3, 6, 4, 5 } },
4075 {
ISD::ABS, MVT::v16i16, { 3, 6, 4, 5 } },
4076 {
ISD::ABS, MVT::v32i8, { 3, 6, 4, 5 } },
4089 {
ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
4091 {
ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } },
4092 {
ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
4093 {
ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } },
4094 {
ISD::CTLZ, MVT::v4i32, { 12, 20, 19, 23 } },
4095 {
ISD::CTLZ, MVT::v16i16, { 19, 22, 29, 38 } },
4096 {
ISD::CTLZ, MVT::v8i16, { 9, 16, 14, 18 } },
4097 {
ISD::CTLZ, MVT::v32i8, { 14, 15, 19, 28 } },
4098 {
ISD::CTLZ, MVT::v16i8, { 7, 12, 9, 13 } },
4099 {
ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } },
4100 {
ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } },
4101 {
ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } },
4102 {
ISD::CTPOP, MVT::v4i32, { 9, 20, 14, 18 } },
4103 {
ISD::CTPOP, MVT::v16i16, { 16, 21, 22, 31 } },
4104 {
ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
4105 {
ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } },
4106 {
ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
4107 {
ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } },
4108 {
ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } },
4109 {
ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } },
4110 {
ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } },
4111 {
ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } },
4112 {
ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } },
4113 {
ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } },
4114 {
ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } },
4120 {
ISD::SMAX, MVT::v4i64, { 6, 9, 6, 12 } },
4121 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 4 } },
4122 {
ISD::SMAX, MVT::v8i32, { 4, 6, 5, 6 } },
4123 {
ISD::SMAX, MVT::v16i16, { 4, 6, 5, 6 } },
4124 {
ISD::SMAX, MVT::v32i8, { 4, 6, 5, 6 } },
4125 {
ISD::SMIN, MVT::v4i64, { 6, 9, 6, 12 } },
4126 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
4127 {
ISD::SMIN, MVT::v8i32, { 4, 6, 5, 6 } },
4128 {
ISD::SMIN, MVT::v16i16, { 4, 6, 5, 6 } },
4129 {
ISD::SMIN, MVT::v32i8, { 4, 6, 5, 6 } },
4130 {
ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
4131 {
ISD::SMULO, MVT::v2i64, { 9, 9, 13, 17 } },
4132 {
ISD::SMULO, MVT::v8i32, { 15, 20, 24, 29 } },
4133 {
ISD::SMULO, MVT::v4i32, { 7, 15, 11, 13 } },
4134 {
ISD::SMULO, MVT::v16i16, { 8, 14, 14, 15 } },
4136 {
ISD::SMULO, MVT::v32i8, { 20, 20, 37, 39 } },
4137 {
ISD::SMULO, MVT::v16i8, { 9, 22, 18, 21 } },
4148 {
ISD::UMAX, MVT::v4i64, { 9, 10, 11, 17 } },
4149 {
ISD::UMAX, MVT::v2i64, { 4, 8, 5, 7 } },
4150 {
ISD::UMAX, MVT::v8i32, { 4, 6, 5, 6 } },
4151 {
ISD::UMAX, MVT::v16i16, { 4, 6, 5, 6 } },
4152 {
ISD::UMAX, MVT::v32i8, { 4, 6, 5, 6 } },
4153 {
ISD::UMIN, MVT::v4i64, { 9, 10, 11, 17 } },
4154 {
ISD::UMIN, MVT::v2i64, { 4, 8, 5, 7 } },
4155 {
ISD::UMIN, MVT::v8i32, { 4, 6, 5, 6 } },
4156 {
ISD::UMIN, MVT::v16i16, { 4, 6, 5, 6 } },
4157 {
ISD::UMIN, MVT::v32i8, { 4, 6, 5, 6 } },
4158 {
ISD::UMULO, MVT::v4i64, { 24, 26, 39, 45 } },
4159 {
ISD::UMULO, MVT::v2i64, { 10, 12, 15, 20 } },
4160 {
ISD::UMULO, MVT::v8i32, { 14, 15, 23, 28 } },
4161 {
ISD::UMULO, MVT::v4i32, { 7, 12, 11, 13 } },
4162 {
ISD::UMULO, MVT::v16i16, { 7, 11, 13, 14 } },
4164 {
ISD::UMULO, MVT::v32i8, { 19, 19, 35, 37 } },
4165 {
ISD::UMULO, MVT::v16i8, { 9, 19, 17, 20 } },
4172 { ISD::FMAXNUM, MVT::f32, { 3, 6, 3, 5 } },
4173 { ISD::FMAXNUM, MVT::v4f32, { 3, 6, 3, 5 } },
4174 { ISD::FMAXNUM, MVT::v8f32, { 5, 7, 3, 10 } },
4175 { ISD::FMAXNUM, MVT::f64, { 3, 6, 3, 5 } },
4176 { ISD::FMAXNUM, MVT::v2f64, { 3, 6, 3, 5 } },
4177 { ISD::FMAXNUM, MVT::v4f64, { 5, 7, 3, 10 } },
4178 { ISD::FSQRT, MVT::f32, { 21, 21, 1, 1 } },
4179 { ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } },
4180 { ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } },
4181 { ISD::FSQRT, MVT::f64, { 27, 27, 1, 1 } },
4182 { ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } },
4183 { ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } },
4207 { ISD::FSQRT, MVT::f32, { 19, 20, 1, 1 } },
4208 { ISD::FSQRT, MVT::v4f32, { 37, 41, 1, 5 } },
4209 { ISD::FSQRT, MVT::f64, { 34, 35, 1, 1 } },
4210 { ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } },
4216 { ISD::FSQRT, MVT::f32, { 20, 20, 1, 1 } },
4217 { ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } },
4218 { ISD::FSQRT, MVT::f64, { 35, 35, 1, 1 } },
4219 { ISD::FSQRT, MVT::v2f64, { 70, 71, 1, 5 } },
4222 { ISD::FMAXNUM, MVT::f32, { 5, 5, 7, 7 } },
4223 { ISD::FMAXNUM, MVT::v4f32, { 4, 4, 4, 5 } },
4224 { ISD::FMAXNUM, MVT::f64, { 5, 5, 7, 7 } },
4225 { ISD::FMAXNUM, MVT::v2f64, { 4, 4, 4, 5 } },
4226 { ISD::FSQRT, MVT::f32, { 18, 18, 1, 1 } },
4227 { ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } },
4230 {
ISD::ABS, MVT::v2i64, { 3, 4, 3, 5 } },
4235 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 3 } },
4236 {
ISD::SMAX, MVT::v4i32, { 1, 1, 1, 1 } },
4237 {
ISD::SMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4238 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
4239 {
ISD::SMIN, MVT::v4i32, { 1, 1, 1, 1 } },
4240 {
ISD::SMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4241 {
ISD::SMULO, MVT::v2i64, { 9, 11, 13, 17 } },
4242 {
ISD::SMULO, MVT::v4i32, { 20, 24, 13, 19 } },
4244 {
ISD::SMULO, MVT::v16i8, { 13, 22, 24, 25 } },
4249 {
ISD::UMAX, MVT::v2i64, { 2, 11, 6, 7 } },
4250 {
ISD::UMAX, MVT::v4i32, { 1, 1, 1, 1 } },
4251 {
ISD::UMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4252 {
ISD::UMIN, MVT::v2i64, { 2, 11, 6, 7 } },
4253 {
ISD::UMIN, MVT::v4i32, { 1, 1, 1, 1 } },
4254 {
ISD::UMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4255 {
ISD::UMULO, MVT::v2i64, { 14, 20, 15, 20 } },
4256 {
ISD::UMULO, MVT::v4i32, { 19, 22, 12, 18 } },
4258 {
ISD::UMULO, MVT::v16i8, { 13, 19, 18, 20 } },
4261 {
ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
4262 {
ISD::ABS, MVT::v8i16, { 1, 2, 1, 1 } },
4263 {
ISD::ABS, MVT::v16i8, { 1, 2, 1, 1 } },
4271 {
ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
4272 {
ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
4273 {
ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
4274 {
ISD::CTLZ, MVT::v16i8, { 11, 15, 10, 16 } },
4275 {
ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } },
4276 {
ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
4277 {
ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
4278 {
ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
4279 {
ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } },
4280 {
ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } },
4281 {
ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } },
4282 {
ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } }
4285 {
ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } },
4286 {
ISD::ABS, MVT::v4i32, { 1, 4, 4, 4 } },
4287 {
ISD::ABS, MVT::v8i16, { 1, 2, 3, 3 } },
4288 {
ISD::ABS, MVT::v16i8, { 1, 2, 3, 3 } },
4293 {
ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
4296 {
ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
4297 {
ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
4298 {
ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
4299 {
ISD::CTLZ, MVT::v16i8, { 8, 39, 29, 32 } },
4300 {
ISD::CTPOP, MVT::v2i64, { 12, 26, 16, 18 } },
4301 {
ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } },
4302 {
ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } },
4303 {
ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } },
4304 {
ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } },
4305 {
ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } },
4306 {
ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } },
4307 {
ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } },
4312 {
ISD::SMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4313 {
ISD::SMAX, MVT::v4i32, { 2, 4, 5, 5 } },
4314 {
ISD::SMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4315 {
ISD::SMAX, MVT::v16i8, { 2, 4, 5, 5 } },
4316 {
ISD::SMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4317 {
ISD::SMIN, MVT::v4i32, { 2, 4, 5, 5 } },
4318 {
ISD::SMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4319 {
ISD::SMIN, MVT::v16i8, { 2, 4, 5, 5 } },
4320 {
ISD::SMULO, MVT::v2i64, { 30, 33, 13, 23 } },
4321 {
ISD::SMULO, MVT::v4i32, { 20, 24, 23, 23 } },
4323 {
ISD::SMULO, MVT::v16i8, { 13, 23, 24, 25 } },
4332 {
ISD::UMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4333 {
ISD::UMAX, MVT::v4i32, { 2, 5, 8, 8 } },
4334 {
ISD::UMAX, MVT::v8i16, { 1, 3, 3, 3 } },
4335 {
ISD::UMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4336 {
ISD::UMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4337 {
ISD::UMIN, MVT::v4i32, { 2, 5, 8, 8 } },
4338 {
ISD::UMIN, MVT::v8i16, { 1, 3, 3, 3 } },
4339 {
ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4340 {
ISD::UMULO, MVT::v2i64, { 30, 33, 15, 29 } },
4341 {
ISD::UMULO, MVT::v4i32, { 19, 22, 14, 18 } },
4343 {
ISD::UMULO, MVT::v16i8, { 13, 19, 20, 20 } },
4348 { ISD::FMAXNUM, MVT::f64, { 5, 5, 7, 7 } },
4349 { ISD::FMAXNUM, MVT::v2f64, { 4, 6, 6, 6 } },
4350 { ISD::FSQRT, MVT::f64, { 32, 32, 1, 1 } },
4351 { ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } },
4354 { ISD::FMAXNUM, MVT::f32, { 5, 5, 7, 7 } },
4355 { ISD::FMAXNUM, MVT::v4f32, { 4, 6, 6, 6 } },
4356 { ISD::FSQRT, MVT::f32, { 28, 30, 1, 2 } },
4357 { ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } },
4360 {
ISD::CTTZ, MVT::i64, { 1, 1, 1, 1 } },
4363 {
ISD::CTTZ, MVT::i32, { 1, 1, 1, 1 } },
4364 {
ISD::CTTZ, MVT::i16, { 2, 1, 1, 1 } },
4368 {
ISD::CTLZ, MVT::i64, { 1, 1, 1, 1 } },
4371 {
ISD::CTLZ, MVT::i32, { 1, 1, 1, 1 } },
4372 {
ISD::CTLZ, MVT::i16, { 2, 1, 1, 1 } },
4384 {
ISD::ABS, MVT::i64, { 1, 2, 3, 3 } },
4387 {
ISD::CTLZ, MVT::i64, { 1, 2, 3, 3 } },
4388 {
ISD::CTLZ, MVT::i32, { 1, 2, 3, 3 } },
4389 {
ISD::CTLZ, MVT::i16, { 2, 2, 3, 3 } },
4392 {
ISD::CTTZ, MVT::i64, { 1, 2, 2, 2 } },
4393 {
ISD::CTTZ, MVT::i32, { 1, 2, 2, 2 } },
4394 {
ISD::CTTZ, MVT::i16, { 2, 2, 2, 2 } },
4398 {
ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
4399 {
ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
4401 {
ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
4406 {
ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
4407 {
ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
4408 {
ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
4409 {
ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
4416 {
ISD::ABS, MVT::i32, { 1, 2, 3, 3 } },
4417 {
ISD::ABS, MVT::i16, { 2, 2, 3, 3 } },
4418 {
ISD::ABS, MVT::i8, { 2, 4, 4, 3 } },
4424 {
ISD::CTLZ, MVT::i32, { 2, 2, 4, 5 } },
4425 {
ISD::CTLZ, MVT::i16, { 2, 2, 4, 5 } },
4430 {
ISD::CTTZ, MVT::i32, { 2, 2, 3, 3 } },
4431 {
ISD::CTTZ, MVT::i16, { 2, 2, 2, 3 } },
4439 {
ISD::ROTL, MVT::i32, { 2, 3, 1, 3 } },
4440 {
ISD::ROTL, MVT::i16, { 2, 3, 1, 3 } },
4442 {
ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
4443 {
ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
4448 {
ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
4449 {
ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
4463 {
ISD::SMAX, MVT::i32, { 1, 2, 2, 3 } },
4464 {
ISD::SMAX, MVT::i16, { 1, 4, 2, 4 } },
4466 {
ISD::SMIN, MVT::i32, { 1, 2, 2, 3 } },
4467 {
ISD::SMIN, MVT::i16, { 1, 4, 2, 4 } },
4469 {
ISD::UMAX, MVT::i32, { 1, 2, 2, 3 } },
4470 {
ISD::UMAX, MVT::i16, { 1, 4, 2, 4 } },
4472 {
ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
4473 {
ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
4496 case Intrinsic::abs:
4499 case Intrinsic::bitreverse:
4502 case Intrinsic::bswap:
4505 case Intrinsic::ctlz:
4508 case Intrinsic::ctpop:
4511 case Intrinsic::cttz:
4514 case Intrinsic::fshl:
4518 if (Args[0] == Args[1]) {
4529 case Intrinsic::fshr:
4534 if (Args[0] == Args[1]) {
4545 case Intrinsic::lrint:
4546 case Intrinsic::llrint: {
4553 case Intrinsic::maxnum:
4554 case Intrinsic::minnum:
4558 case Intrinsic::sadd_sat:
4561 case Intrinsic::smax:
4564 case Intrinsic::smin:
4567 case Intrinsic::ssub_sat:
4570 case Intrinsic::uadd_sat:
4573 case Intrinsic::umax:
4576 case Intrinsic::umin:
4579 case Intrinsic::usub_sat:
4582 case Intrinsic::sqrt:
4585 case Intrinsic::sadd_with_overflow:
4586 case Intrinsic::ssub_with_overflow:
4591 case Intrinsic::uadd_with_overflow:
4592 case Intrinsic::usub_with_overflow:
4597 case Intrinsic::smul_with_overflow:
4601 case Intrinsic::umul_with_overflow:
4608 auto adjustTableCost = [&](
int ISD,
unsigned Cost,
4609 std::pair<InstructionCost, MVT> LT,
4612 MVT MTy = LT.second;
4617 if (
ISD == ISD::FMAXNUM ||
ISD == ISD::FMINNUM) {
4619 return LegalizationCost * 1;
4624 if (
ISD ==
ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
4629 if (LI->hasOneUse())
4636 return LegalizationCost * (int)
Cost;
4641 MVT MTy = LT.second;
4649 if (Cst->isAllOnesValue())
4657 if (ST->useGLMDivSqrtCosts())
4659 if (
auto KindCost = Entry->Cost[
CostKind])
4660 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4662 if (ST->useSLMArithCosts())
4664 if (
auto KindCost = Entry->Cost[
CostKind])
4665 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4669 if (
auto KindCost = Entry->Cost[
CostKind])
4670 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4672 if (ST->hasBITALG())
4674 if (
auto KindCost = Entry->Cost[
CostKind])
4675 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4677 if (ST->hasVPOPCNTDQ())
4679 if (
auto KindCost = Entry->Cost[
CostKind])
4680 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4684 if (
auto KindCost = Entry->Cost[
CostKind])
4685 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4689 if (
auto KindCost = Entry->Cost[
CostKind])
4690 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4694 if (
auto KindCost = Entry->Cost[
CostKind])
4695 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4697 if (ST->hasAVX512())
4699 if (
auto KindCost = Entry->Cost[
CostKind])
4700 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4704 if (
auto KindCost = Entry->Cost[
CostKind])
4705 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4709 if (
auto KindCost = Entry->Cost[
CostKind])
4710 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4714 if (
auto KindCost = Entry->Cost[
CostKind])
4715 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4719 if (
auto KindCost = Entry->Cost[
CostKind])
4720 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4724 if (
auto KindCost = Entry->Cost[
CostKind])
4725 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4729 if (
auto KindCost = Entry->Cost[
CostKind])
4730 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4734 if (
auto KindCost = Entry->Cost[
CostKind])
4735 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4739 if (
auto KindCost = Entry->Cost[
CostKind])
4740 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4745 if (
auto KindCost = Entry->Cost[
CostKind])
4746 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4749 if (
auto KindCost = Entry->Cost[
CostKind])
4750 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4753 if (ST->hasLZCNT()) {
4756 if (
auto KindCost = Entry->Cost[
CostKind])
4757 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4760 if (
auto KindCost = Entry->Cost[
CostKind])
4761 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4764 if (ST->hasPOPCNT()) {
4767 if (
auto KindCost = Entry->Cost[
CostKind])
4768 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4771 if (
auto KindCost = Entry->Cost[
CostKind])
4772 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4777 if (
auto KindCost = Entry->Cost[
CostKind])
4778 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4781 if (
auto KindCost = Entry->Cost[
CostKind])
4782 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4787 (IID == Intrinsic::fshl || IID == Intrinsic::fshr)) {