40#include "llvm/Config/llvm-config.h"
51#define DEBUG_TYPE "x86-codegen"
53STATISTIC(NumFXCH,
"Number of fxch instructions inserted");
54STATISTIC(NumFP ,
"Number of floating point instructions");
57 const unsigned ScratchFPReg = 7;
64 memset(Stack, 0,
sizeof(Stack));
65 memset(RegMap, 0,
sizeof(RegMap));
104 unsigned FixCount = 0;
108 unsigned char FixStack[8];
110 LiveBundle() =
default;
113 bool isFixed()
const {
return !
Mask || FixCount; }
129 static_assert(X86::FP6 - X86::FP0 == 6,
"sequential regnums");
130 if (Reg >= X86::FP0 && Reg <= X86::FP6) {
152 unsigned StackTop = 0;
162 unsigned RegMap[NumFPRegs];
165 void setupBlockStack();
168 void finishBlockStack();
170#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
171 void dumpStack()
const {
172 dbgs() <<
"Stack contents:";
173 for (
unsigned i = 0; i != StackTop; ++i) {
175 assert(RegMap[Stack[i]] == i &&
"Stack[] doesn't match RegMap[]!");
182 unsigned getSlot(
unsigned RegNo)
const {
183 assert(RegNo < NumFPRegs &&
"Regno out of range!");
184 return RegMap[RegNo];
188 bool isLive(
unsigned RegNo)
const {
189 unsigned Slot = getSlot(RegNo);
194 unsigned getStackEntry(
unsigned STi)
const {
197 return Stack[StackTop-1-STi];
202 unsigned getSTReg(
unsigned RegNo)
const {
203 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
207 void pushReg(
unsigned Reg) {
208 assert(Reg < NumFPRegs &&
"Register number out of range!");
212 RegMap[
Reg] = StackTop++;
219 RegMap[
Stack[--StackTop]] = ~0;
222 bool isAtTop(
unsigned RegNo)
const {
return getSlot(RegNo) == StackTop-1; }
225 if (isAtTop(RegNo))
return;
227 unsigned STReg = getSTReg(RegNo);
228 unsigned RegOnTop = getStackEntry(0);
231 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
234 if (RegMap[RegOnTop] >= StackTop)
236 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
243 void duplicateToTop(
unsigned RegNo,
unsigned AsReg,
246 unsigned STReg = getSTReg(RegNo);
273 void shuffleStackTop(
const unsigned char *FixStack,
unsigned FixCount,
293 return X86::RFP80RegClass.contains(DstReg) ||
294 X86::RFP80RegClass.contains(SrcReg);
316 assert(Reg >= X86::FP0 && Reg <= X86::FP6 &&
"Expected FP register!");
317 return Reg - X86::FP0;
326 bool FPIsUsed =
false;
328 static_assert(X86::FP6 == X86::FP0+6,
"Register enums aren't sorted right!");
330 for (
unsigned i = 0; i <= 6; ++i)
331 if (!
MRI.reg_nodbg_empty(X86::FP0 + i)) {
337 if (!FPIsUsed)
return false;
339 Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
343 bundleCFGRecomputeKillFlags(MF);
357 if ((
Entry->getParent()->getFunction().getCallingConv() ==
365 assert((Bundle.Mask & 0xFE) == 0 &&
366 "Only FP0 could be passed as an argument");
368 Bundle.FixStack[0] = 0;
371 bool Changed =
false;
373 Changed |= processBasicBlock(MF, *BB);
378 if (Processed.
insert(&BB).second)
379 Changed |= processBasicBlock(MF, BB);
392 assert(LiveBundles.
empty() &&
"Stale data in LiveBundles");
399 const unsigned Mask = calcLiveInMask(&
MBB,
false);
411 bool Changed =
false;
421 if (
MI.isInlineAsm())
424 if (
MI.isCopy() && isFPCopy(
MI))
427 if (
MI.isImplicitDef() &&
428 X86::RFP80RegClass.contains(
MI.getOperand(0).getReg()))
442 if (
MI.isFakeUse()) {
444 if (MO.
isReg() && X86::RFP80RegClass.contains(MO.
getReg())) {
457 PrevMI = &*std::prev(
I);
469 switch (FPInstClass) {
485 static_assert(X86::FP7 - X86::FP0 == 7,
"sequential FP regnumbers");
486 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
487 LLVM_DEBUG(
dbgs() <<
"Register FP#" << Reg - X86::FP0 <<
" is dead!\n");
488 freeStackSlotAfter(
I, Reg-X86::FP0);
496 dbgs() <<
"Just deleted pseudo instruction\n";
500 while (Start != BB.
begin() && std::prev(Start) != PrevI)
502 dbgs() <<
"Inserted instructions:\n\t";
503 Start->print(
dbgs());
504 while (++Start != std::next(
I)) {
521void FPS::setupBlockStack() {
523 <<
" derived from " <<
MBB->
getName() <<
".\n");
526 const LiveBundle &Bundle =
535 assert(Bundle.isFixed() &&
"Reached block before any predecessors");
538 for (
unsigned i = Bundle.FixCount; i > 0; --i) {
540 <<
unsigned(Bundle.FixStack[i - 1]) <<
'\n');
541 pushReg(Bundle.FixStack[i-1]);
547 unsigned Mask = calcLiveInMask(
MBB,
true);
556void FPS::finishBlockStack() {
562 <<
" derived from " <<
MBB->
getName() <<
".\n");
566 LiveBundle &Bundle = LiveBundles[BundleIdx];
571 adjustLiveRegs(Bundle.Mask, Term);
580 if (Bundle.isFixed()) {
582 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
586 Bundle.FixCount = StackTop;
587 for (
unsigned i = 0; i < StackTop; ++i)
588 Bundle.FixStack[i] = getStackEntry(i);
614 if (
I != Table.
end() &&
I->from == Opcode)
620#define ASSERT_SORTED(TABLE)
622#define ASSERT_SORTED(TABLE) \
624 static std::atomic<bool> TABLE##Checked(false); \
625 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
626 assert(is_sorted(TABLE) && \
627 "All lookup tables must be sorted for efficient access!"); \
628 TABLE##Checked.store(true, std::memory_order_relaxed); \
642 { X86::ABS_Fp32 , X86::ABS_F },
643 { X86::ABS_Fp64 , X86::ABS_F },
644 { X86::ABS_Fp80 , X86::ABS_F },
645 { X86::ADD_Fp32m , X86::ADD_F32m },
646 { X86::ADD_Fp64m , X86::ADD_F64m },
647 { X86::ADD_Fp64m32 , X86::ADD_F32m },
648 { X86::ADD_Fp80m32 , X86::ADD_F32m },
649 { X86::ADD_Fp80m64 , X86::ADD_F64m },
650 { X86::ADD_FpI16m32 , X86::ADD_FI16m },
651 { X86::ADD_FpI16m64 , X86::ADD_FI16m },
652 { X86::ADD_FpI16m80 , X86::ADD_FI16m },
653 { X86::ADD_FpI32m32 , X86::ADD_FI32m },
654 { X86::ADD_FpI32m64 , X86::ADD_FI32m },
655 { X86::ADD_FpI32m80 , X86::ADD_FI32m },
656 { X86::CHS_Fp32 , X86::CHS_F },
657 { X86::CHS_Fp64 , X86::CHS_F },
658 { X86::CHS_Fp80 , X86::CHS_F },
659 { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
660 { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
661 { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
662 { X86::CMOVB_Fp32 , X86::CMOVB_F },
663 { X86::CMOVB_Fp64 , X86::CMOVB_F },
664 { X86::CMOVB_Fp80 , X86::CMOVB_F },
665 { X86::CMOVE_Fp32 , X86::CMOVE_F },
666 { X86::CMOVE_Fp64 , X86::CMOVE_F },
667 { X86::CMOVE_Fp80 , X86::CMOVE_F },
668 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
669 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
670 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
671 { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
672 { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
673 { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
674 { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
675 { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
676 { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
677 { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
678 { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
679 { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
680 { X86::CMOVP_Fp32 , X86::CMOVP_F },
681 { X86::CMOVP_Fp64 , X86::CMOVP_F },
682 { X86::CMOVP_Fp80 , X86::CMOVP_F },
683 { X86::COM_FpIr32 , X86::COM_FIr },
684 { X86::COM_FpIr64 , X86::COM_FIr },
685 { X86::COM_FpIr80 , X86::COM_FIr },
686 { X86::COM_Fpr32 , X86::COM_FST0r },
687 { X86::COM_Fpr64 , X86::COM_FST0r },
688 { X86::COM_Fpr80 , X86::COM_FST0r },
689 { X86::DIVR_Fp32m , X86::DIVR_F32m },
690 { X86::DIVR_Fp64m , X86::DIVR_F64m },
691 { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
692 { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
693 { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
694 { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
695 { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
696 { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
697 { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
698 { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
699 { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
700 { X86::DIV_Fp32m , X86::DIV_F32m },
701 { X86::DIV_Fp64m , X86::DIV_F64m },
702 { X86::DIV_Fp64m32 , X86::DIV_F32m },
703 { X86::DIV_Fp80m32 , X86::DIV_F32m },
704 { X86::DIV_Fp80m64 , X86::DIV_F64m },
705 { X86::DIV_FpI16m32 , X86::DIV_FI16m },
706 { X86::DIV_FpI16m64 , X86::DIV_FI16m },
707 { X86::DIV_FpI16m80 , X86::DIV_FI16m },
708 { X86::DIV_FpI32m32 , X86::DIV_FI32m },
709 { X86::DIV_FpI32m64 , X86::DIV_FI32m },
710 { X86::DIV_FpI32m80 , X86::DIV_FI32m },
711 { X86::ILD_Fp16m32 , X86::ILD_F16m },
712 { X86::ILD_Fp16m64 , X86::ILD_F16m },
713 { X86::ILD_Fp16m80 , X86::ILD_F16m },
714 { X86::ILD_Fp32m32 , X86::ILD_F32m },
715 { X86::ILD_Fp32m64 , X86::ILD_F32m },
716 { X86::ILD_Fp32m80 , X86::ILD_F32m },
717 { X86::ILD_Fp64m32 , X86::ILD_F64m },
718 { X86::ILD_Fp64m64 , X86::ILD_F64m },
719 { X86::ILD_Fp64m80 , X86::ILD_F64m },
720 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
721 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
722 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
723 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
724 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
725 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
726 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
727 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
728 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
729 { X86::IST_Fp16m32 , X86::IST_F16m },
730 { X86::IST_Fp16m64 , X86::IST_F16m },
731 { X86::IST_Fp16m80 , X86::IST_F16m },
732 { X86::IST_Fp32m32 , X86::IST_F32m },
733 { X86::IST_Fp32m64 , X86::IST_F32m },
734 { X86::IST_Fp32m80 , X86::IST_F32m },
735 { X86::IST_Fp64m32 , X86::IST_FP64m },
736 { X86::IST_Fp64m64 , X86::IST_FP64m },
737 { X86::IST_Fp64m80 , X86::IST_FP64m },
738 { X86::LD_Fp032 , X86::LD_F0 },
739 { X86::LD_Fp064 , X86::LD_F0 },
740 { X86::LD_Fp080 , X86::LD_F0 },
741 { X86::LD_Fp132 , X86::LD_F1 },
742 { X86::LD_Fp164 , X86::LD_F1 },
743 { X86::LD_Fp180 , X86::LD_F1 },
744 { X86::LD_Fp32m , X86::LD_F32m },
745 { X86::LD_Fp32m64 , X86::LD_F32m },
746 { X86::LD_Fp32m80 , X86::LD_F32m },
747 { X86::LD_Fp64m , X86::LD_F64m },
748 { X86::LD_Fp64m80 , X86::LD_F64m },
749 { X86::LD_Fp80m , X86::LD_F80m },
750 { X86::MUL_Fp32m , X86::MUL_F32m },
751 { X86::MUL_Fp64m , X86::MUL_F64m },
752 { X86::MUL_Fp64m32 , X86::MUL_F32m },
753 { X86::MUL_Fp80m32 , X86::MUL_F32m },
754 { X86::MUL_Fp80m64 , X86::MUL_F64m },
755 { X86::MUL_FpI16m32 , X86::MUL_FI16m },
756 { X86::MUL_FpI16m64 , X86::MUL_FI16m },
757 { X86::MUL_FpI16m80 , X86::MUL_FI16m },
758 { X86::MUL_FpI32m32 , X86::MUL_FI32m },
759 { X86::MUL_FpI32m64 , X86::MUL_FI32m },
760 { X86::MUL_FpI32m80 , X86::MUL_FI32m },
761 { X86::SQRT_Fp32 , X86::SQRT_F },
762 { X86::SQRT_Fp64 , X86::SQRT_F },
763 { X86::SQRT_Fp80 , X86::SQRT_F },
764 { X86::ST_Fp32m , X86::ST_F32m },
765 { X86::ST_Fp64m , X86::ST_F64m },
766 { X86::ST_Fp64m32 , X86::ST_F32m },
767 { X86::ST_Fp80m32 , X86::ST_F32m },
768 { X86::ST_Fp80m64 , X86::ST_F64m },
769 { X86::ST_FpP80m , X86::ST_FP80m },
770 { X86::SUBR_Fp32m , X86::SUBR_F32m },
771 { X86::SUBR_Fp64m , X86::SUBR_F64m },
772 { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
773 { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
774 { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
775 { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
776 { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
777 { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
778 { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
779 { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
780 { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
781 { X86::SUB_Fp32m , X86::SUB_F32m },
782 { X86::SUB_Fp64m , X86::SUB_F64m },
783 { X86::SUB_Fp64m32 , X86::SUB_F32m },
784 { X86::SUB_Fp80m32 , X86::SUB_F32m },
785 { X86::SUB_Fp80m64 , X86::SUB_F64m },
786 { X86::SUB_FpI16m32 , X86::SUB_FI16m },
787 { X86::SUB_FpI16m64 , X86::SUB_FI16m },
788 { X86::SUB_FpI16m80 , X86::SUB_FI16m },
789 { X86::SUB_FpI32m32 , X86::SUB_FI32m },
790 { X86::SUB_FpI32m64 , X86::SUB_FI32m },
791 { X86::SUB_FpI32m80 , X86::SUB_FI32m },
792 { X86::TST_Fp32 , X86::TST_F },
793 { X86::TST_Fp64 , X86::TST_F },
794 { X86::TST_Fp80 , X86::TST_F },
795 { X86::UCOM_FpIr32 , X86::UCOM_FIr },
796 { X86::UCOM_FpIr64 , X86::UCOM_FIr },
797 { X86::UCOM_FpIr80 , X86::UCOM_FIr },
798 { X86::UCOM_Fpr32 , X86::UCOM_Fr },
799 { X86::UCOM_Fpr64 , X86::UCOM_Fr },
800 { X86::UCOM_Fpr80 , X86::UCOM_Fr },
801 { X86::XAM_Fp32 , X86::XAM_F },
802 { X86::XAM_Fp64 , X86::XAM_F },
803 { X86::XAM_Fp80 , X86::XAM_F },
809 assert(
Opc != -1 &&
"FP Stack instruction not in OpcodeTable!");
821 { X86::ADD_FrST0 , X86::ADD_FPrST0 },
823 { X86::COMP_FST0r, X86::FCOMPP },
824 { X86::COM_FIr , X86::COM_FIPr },
825 { X86::COM_FST0r , X86::COMP_FST0r },
827 { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
828 { X86::DIV_FrST0 , X86::DIV_FPrST0 },
830 { X86::IST_F16m , X86::IST_FP16m },
831 { X86::IST_F32m , X86::IST_FP32m },
833 { X86::MUL_FrST0 , X86::MUL_FPrST0 },
835 { X86::ST_F32m , X86::ST_FP32m },
836 { X86::ST_F64m , X86::ST_FP64m },
837 { X86::ST_Frr , X86::ST_FPrr },
839 { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
840 { X86::SUB_FrST0 , X86::SUB_FPrST0 },
842 { X86::UCOM_FIr , X86::UCOM_FIPr },
844 { X86::UCOM_FPr , X86::UCOM_FPPr },
845 { X86::UCOM_Fr , X86::UCOM_FPr },
850 MI.findRegisterDefOperand(X86::FPSW,
nullptr))
883 I->setDesc(
TII->get(Opcode));
884 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
886 MI.dropDebugNumber();
893 if (Next !=
MBB.
end() && Next->readsRegister(X86::FPSW,
nullptr))
905 if (getStackEntry(0) == FPRegNo) {
913 I = freeStackSlotBefore(++
I, FPRegNo);
920 unsigned STReg = getSTReg(FPRegNo);
921 unsigned OldSlot = getSlot(FPRegNo);
922 unsigned TopReg =
Stack[StackTop-1];
923 Stack[OldSlot] = TopReg;
924 RegMap[TopReg] = OldSlot;
925 RegMap[FPRegNo] = ~0;
926 Stack[--StackTop] = ~0;
935 unsigned Defs =
Mask;
937 for (
unsigned i = 0; i < StackTop; ++i) {
938 unsigned RegNo =
Stack[i];
939 if (!(Defs & (1 << RegNo)))
941 Kills |= (1 << RegNo);
944 Defs &= ~(1 << RegNo);
946 assert((Kills & Defs) == 0 &&
"Register needs killing and def'ing?");
949 while (Kills && Defs) {
952 LLVM_DEBUG(
dbgs() <<
"Renaming %fp" << KReg <<
" as imp %fp" << DReg
954 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
956 Kills &= ~(1 << KReg);
957 Defs &= ~(1 <<
DReg);
964 unsigned KReg = getStackEntry(0);
965 if (!(Kills & (1 << KReg)))
969 Kills &= ~(1 << KReg);
977 freeStackSlotBefore(
I, KReg);
978 Kills &= ~(1 << KReg);
987 Defs &= ~(1 <<
DReg);
998void FPS::shuffleStackTop(
const unsigned char *FixStack,
1002 while (FixCount--) {
1004 unsigned OldReg = getStackEntry(FixCount);
1006 unsigned Reg = FixStack[FixCount];
1012 moveToTop(OldReg,
I);
1024 unsigned STReturns = 0;
1026 bool ClobbersFPStack =
false;
1027 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
1031 if (
Op.isRegMask()) {
1032 bool ClobbersFP0 =
Op.clobbersPhysReg(X86::FP0);
1034 static_assert(X86::FP7 - X86::FP0 == 7,
"sequential FP regnumbers");
1035 for (
unsigned i = 1; i != 8; ++i)
1036 assert(
Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&
1037 "Inconsistent FP register clobber");
1041 ClobbersFPStack =
true;
1044 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1047 assert(
Op.isImplicit() &&
"Expected implicit def/use");
1053 MI.removeOperand(i);
1061 assert((ClobbersFPStack || STReturns == 0) &&
1062 "ST returns without FP stack clobber");
1063 if (!ClobbersFPStack)
1075 while (StackTop > 0)
1078 for (
unsigned I = 0;
I <
N; ++
I)
1084 I->dropDebugNumber();
1093 unsigned FirstFPRegOp = ~0
U, SecondFPRegOp = ~0
U;
1094 unsigned LiveMask = 0;
1096 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
1098 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1105 MI.killsRegister(
Op.getReg(),
1107 "Ret only defs operands, and values aren't live beyond it");
1109 if (FirstFPRegOp == ~0U)
1112 assert(SecondFPRegOp == ~0U &&
"More than two fp operands!");
1118 MI.removeOperand(i);
1125 adjustLiveRegs(LiveMask,
MI);
1126 if (!LiveMask)
return;
1132 if (SecondFPRegOp == ~0U) {
1134 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1135 "Top of stack not the right register for RET!");
1147 if (StackTop == 1) {
1148 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1149 "Stack misconfiguration for RET!");
1153 unsigned NewReg = ScratchFPReg;
1154 duplicateToTop(FirstFPRegOp, NewReg,
MI);
1155 FirstFPRegOp = NewReg;
1159 assert(StackTop == 2 &&
"Must have two values live!");
1163 if (getStackEntry(0) == SecondFPRegOp) {
1164 assert(getStackEntry(1) == FirstFPRegOp &&
"Unknown regs live");
1165 moveToTop(FirstFPRegOp,
MI);
1170 assert(getStackEntry(0) == FirstFPRegOp &&
"Unknown regs live");
1171 assert(getStackEntry(1) == SecondFPRegOp &&
"Unknown regs live");
1179 unsigned DestReg =
getFPReg(
MI.getOperand(0));
1182 MI.removeOperand(0);
1190 MI.dropDebugNumber();
1197 unsigned NumOps =
MI.getDesc().getNumOperands();
1199 "Can only handle fst* & ftst instructions!");
1203 bool KillsSrc =
MI.killsRegister(X86::FP0 + Reg,
nullptr);
1211 if (!KillsSrc && (
MI.getOpcode() == X86::IST_Fp64m32 ||
1212 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1213 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1214 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1215 MI.getOpcode() == X86::IST_Fp64m64 ||
1216 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1217 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1218 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1219 MI.getOpcode() == X86::IST_Fp64m80 ||
1220 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1221 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1222 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1223 MI.getOpcode() == X86::ST_FpP80m)) {
1224 duplicateToTop(Reg, ScratchFPReg,
I);
1230 MI.removeOperand(NumOps - 1);
1235 if (
MI.getOpcode() == X86::IST_FP64m ||
MI.getOpcode() == X86::ISTT_FP16m ||
1236 MI.getOpcode() == X86::ISTT_FP32m ||
MI.getOpcode() == X86::ISTT_FP64m ||
1237 MI.getOpcode() == X86::ST_FP80m) {
1241 }
else if (KillsSrc) {
1245 MI.dropDebugNumber();
1260 unsigned NumOps =
MI.getDesc().getNumOperands();
1261 assert(NumOps >= 2 &&
"FPRW instructions must have 2 ops!!");
1266 bool KillsSrc =
MI.killsRegister(X86::FP0 + Reg,
nullptr);
1279 duplicateToTop(Reg,
getFPReg(
MI.getOperand(0)),
I);
1283 MI.removeOperand(1);
1284 MI.removeOperand(0);
1286 MI.dropDebugNumber();
1296 { X86::ADD_Fp32 , X86::ADD_FST0r },
1297 { X86::ADD_Fp64 , X86::ADD_FST0r },
1298 { X86::ADD_Fp80 , X86::ADD_FST0r },
1299 { X86::DIV_Fp32 , X86::DIV_FST0r },
1300 { X86::DIV_Fp64 , X86::DIV_FST0r },
1301 { X86::DIV_Fp80 , X86::DIV_FST0r },
1302 { X86::MUL_Fp32 , X86::MUL_FST0r },
1303 { X86::MUL_Fp64 , X86::MUL_FST0r },
1304 { X86::MUL_Fp80 , X86::MUL_FST0r },
1305 { X86::SUB_Fp32 , X86::SUB_FST0r },
1306 { X86::SUB_Fp64 , X86::SUB_FST0r },
1307 { X86::SUB_Fp80 , X86::SUB_FST0r },
1312 { X86::ADD_Fp32 , X86::ADD_FST0r },
1313 { X86::ADD_Fp64 , X86::ADD_FST0r },
1314 { X86::ADD_Fp80 , X86::ADD_FST0r },
1315 { X86::DIV_Fp32 , X86::DIVR_FST0r },
1316 { X86::DIV_Fp64 , X86::DIVR_FST0r },
1317 { X86::DIV_Fp80 , X86::DIVR_FST0r },
1318 { X86::MUL_Fp32 , X86::MUL_FST0r },
1319 { X86::MUL_Fp64 , X86::MUL_FST0r },
1320 { X86::MUL_Fp80 , X86::MUL_FST0r },
1321 { X86::SUB_Fp32 , X86::SUBR_FST0r },
1322 { X86::SUB_Fp64 , X86::SUBR_FST0r },
1323 { X86::SUB_Fp80 , X86::SUBR_FST0r },
1328 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1329 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1330 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1331 { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1332 { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1333 { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1334 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1335 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1336 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1337 { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1338 { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1339 { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1344 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1345 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1346 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1347 { X86::DIV_Fp32 , X86::DIV_FrST0 },
1348 { X86::DIV_Fp64 , X86::DIV_FrST0 },
1349 { X86::DIV_Fp80 , X86::DIV_FrST0 },
1350 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1351 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1352 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1353 { X86::SUB_Fp32 , X86::SUB_FrST0 },
1354 { X86::SUB_Fp64 , X86::SUB_FrST0 },
1355 { X86::SUB_Fp80 , X86::SUB_FrST0 },
1372 unsigned NumOperands =
MI.getDesc().getNumOperands();
1373 assert(NumOperands == 3 &&
"Illegal TwoArgFP instruction!");
1375 unsigned Op0 =
getFPReg(
MI.getOperand(NumOperands - 2));
1376 unsigned Op1 =
getFPReg(
MI.getOperand(NumOperands - 1));
1377 bool KillsOp0 =
MI.killsRegister(X86::FP0 + Op0,
nullptr);
1378 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1,
nullptr);
1381 unsigned TOS = getStackEntry(0);
1385 if (Op0 != TOS && Op1 != TOS) {
1392 }
else if (KillsOp1) {
1401 duplicateToTop(Op0, Dest,
I);
1405 }
else if (!KillsOp0 && !KillsOp1) {
1409 duplicateToTop(Op0, Dest,
I);
1416 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1417 "Stack conditions not set up right!");
1422 bool isForward = TOS == Op0;
1423 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1436 int Opcode =
Lookup(InstTable,
MI.getOpcode());
1437 assert(Opcode != -1 &&
"Unknown TwoArgFP pseudo instruction!");
1440 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1446 if (!
MI.mayRaiseFPException())
1447 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1451 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1452 assert(!updateST0 &&
"Should have updated other operand!");
1458 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1459 assert(UpdatedSlot < StackTop && Dest < 7);
1460 Stack[UpdatedSlot] = Dest;
1461 RegMap[Dest] = UpdatedSlot;
1471 unsigned NumOperands =
MI.getDesc().getNumOperands();
1472 assert(NumOperands == 2 &&
"Illegal FUCOM* instruction!");
1473 unsigned Op0 =
getFPReg(
MI.getOperand(NumOperands - 2));
1474 unsigned Op1 =
getFPReg(
MI.getOperand(NumOperands - 1));
1475 bool KillsOp0 =
MI.killsRegister(X86::FP0 + Op0,
nullptr);
1476 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1,
nullptr);
1483 MI.getOperand(0).setReg(getSTReg(Op1));
1484 MI.removeOperand(1);
1486 MI.dropDebugNumber();
1489 if (KillsOp0) freeStackSlotAfter(
I, Op0);
1490 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(
I, Op1);
1502 bool KillsOp1 =
MI.killsRegister(X86::FP0 + Op1,
nullptr);
1509 MI.removeOperand(0);
1510 MI.removeOperand(1);
1511 MI.getOperand(0).setReg(getSTReg(Op1));
1513 MI.dropDebugNumber();
1516 if (Op0 != Op1 && KillsOp1) {
1518 freeStackSlotAfter(
I, Op1);
1535 if (
MI.isReturn()) {
1540 switch (
MI.getOpcode()) {
1542 case TargetOpcode::COPY: {
1546 bool KillsSrc =
MI.killsRegister(MO1.
getReg(),
nullptr);
1551 assert(isLive(SrcFP) &&
"Cannot copy dead register");
1555 unsigned Slot = getSlot(SrcFP);
1557 RegMap[DstFP] =
Slot;
1561 duplicateToTop(SrcFP, DstFP, Inst);
1566 case TargetOpcode::IMPLICIT_DEF: {
1568 unsigned Reg =
MI.getOperand(0).getReg() - X86::FP0;
1569 LLVM_DEBUG(
dbgs() <<
"Emitting LD_F0 for implicit FP" << Reg <<
'\n');
1575 case TargetOpcode::INLINEASM:
1576 case TargetOpcode::INLINEASM_BR: {
1610 unsigned STUses = 0, STDefs = 0, STClobbers = 0;
1611 unsigned NumOps = 0;
1616 i != e &&
MI.getOperand(i).isImm(); i += 1 + NumOps) {
1617 unsigned Flags =
MI.getOperand(i).getImm();
1620 NumOps =
F.getNumOperandRegisters();
1626 unsigned STReg = MO.
getReg() - X86::FP0;
1632 if (
F.hasRegClassConstraint(RCID)) {
1637 switch (
F.getKind()) {
1638 case InlineAsm::Kind::RegUse:
1639 STUses |= (1u << STReg);
1641 case InlineAsm::Kind::RegDef:
1642 case InlineAsm::Kind::RegDefEarlyClobber:
1643 STDefs |= (1u << STReg);
1645 case InlineAsm::Kind::Clobber:
1646 STClobbers |= (1u << STReg);
1654 MI.emitGenericError(
"fixed input regs must be last on the x87 stack");
1659 MI.emitGenericError(
"output regs must be last on the x87 stack");
1665 if (STClobbers && !
isMask_32(STDefs | STClobbers))
1666 MI.emitGenericError(
"clobbers must be last on the x87 stack");
1669 unsigned STPopped = STUses & (STDefs | STClobbers);
1671 MI.emitGenericError(
1672 "implicitly popped regs must be last on the x87 stack");
1675 LLVM_DEBUG(
dbgs() <<
"Asm uses " << NumSTUses <<
" fixed regs, pops "
1676 << NumSTPopped <<
", and defines " << NumSTDefs
1682 for (
unsigned I = 0, E =
MI.getNumOperands();
I < E; ++
I)
1685 "Operands with constraint \"f\" cannot overlap with defs");
1691 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1693 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1700 if (
Op.isUse() &&
Op.isKill())
1701 FPKills |= 1U <<
FPReg;
1705 FPKills &= ~(STDefs | STClobbers);
1708 unsigned char STUsesArray[8];
1710 for (
unsigned I = 0;
I < NumSTUses; ++
I)
1713 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1715 dbgs() <<
"Before asm: ";
1720 for (
unsigned i = 0, e =
MI.getNumOperands(); i !=
e; ++i) {
1722 if (!
Op.isReg() ||
Op.getReg() < X86::FP0 ||
Op.getReg() > X86::FP6)
1727 if (FRegIdx.
count(i))
1736 StackTop -= NumSTPopped;
1738 for (
unsigned i = 0; i < NumSTDefs; ++i)
1739 pushReg(NumSTDefs - i - 1);
1751 freeStackSlotAfter(Inst,
FPReg);
1752 FPKills &= ~(1U <<
FPReg);
1763 case TargetOpcode::FAKE_USE: {
1764 assert(
MI.getNumExplicitOperands() == 1 &&
1765 "FAKE_USE must have exactly one operand");
1766 if (
MI.getOperand(0).isKill()) {
1767 freeStackSlotBefore(Inst,
getFPReg(
MI.getOperand(0)));
1769 MI.removeOperand(0);
1790 LPR.addLiveOuts(
MBB);
1793 if (
MI.isDebugInstr())
1796 std::bitset<8> Defs;
1799 for (
auto &MO :
MI.operands()) {
1810 if (LPR.available(MO.
getReg()))
1813 Uses.push_back(&MO);
1816 for (
auto *MO :
Uses)
1820 LPR.stepBackward(
MI);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define LLVM_ATTRIBUTE_UNUSED
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
const HexagonInstrInfo * TII
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static constexpr MCPhysReg FPReg
Remove Loads Into Fake Uses
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static const TableEntry ReverseST0Table[]
#define ASSERT_SORTED(TABLE)
static const TableEntry ForwardST0Table[]
static bool doesInstructionSetFPSW(MachineInstr &MI)
static unsigned getFPReg(const MachineOperand &MO)
getFPReg - Return the X86::FPx register number for the specified operand.
static const TableEntry ForwardSTiTable[]
static const TableEntry OpcodeTable[]
static const TableEntry ReverseSTiTable[]
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static const TableEntry PopTable[]
static unsigned getConcreteOpcode(unsigned Opcode)
static MachineBasicBlock::iterator getNextFPInstruction(MachineBasicBlock::iterator I)
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents an Operation in the Expression.
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
FunctionPass class - This class is used to implement most global optimizations.
A set of register units used to track register liveness.
livein_iterator livein_end() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI void removeLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LiveInVector::const_iterator livein_iterator
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI livein_iterator livein_begin() const
LLVM_ABI void dump() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
Reg
All possible values of the reg field in the ModR/M byte.
@ SpecialFP
SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
@ NotFP
NotFP - The default, set for instructions that do not use FP registers.
@ OneArgFPRW
OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a result back to ST(0).
@ ZeroArgFP
ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0.
@ OneArgFP
OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst.
@ CompareFP
CompareFP - 2 arg FP instructions which implicitly read ST(0) and an explicit argument,...
@ CondMovFP
CondMovFP - "2 operand" floating point conditional move instructions.
@ TwoArgFP
TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an explicit argument,...
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createX86FloatingPointStackifierPass()
This function returns a pass which converts floating-point register references and pseudo instruction...
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
bool operator<(int64_t V1, const APSInt &V2)
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
LLVM_ABI char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
std::pair< iterator, bool > insert(NodeRef N)