LLVM 22.0.0git
X86FloatingPoint.cpp
Go to the documentation of this file.
1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the pass which converts floating point instructions from
10// pseudo registers into register stack instructions. This pass uses live
11// variable information to indicate where the FPn registers are used and their
12// lifetimes.
13//
14// The x87 hardware tracks liveness of the stack registers, so it is necessary
15// to implement exact liveness tracking between basic blocks. The CFG edges are
16// partitioned into bundles where the same FP registers must be live in
17// identical stack positions. Instructions are inserted at the end of each basic
18// block to rearrange the live registers to match the outgoing bundle.
19//
20// This approach avoids splitting critical edges at the potential cost of more
21// live register shuffling instructions when critical edges are present.
22//
23//===----------------------------------------------------------------------===//
24
25#include "X86.h"
26#include "X86InstrInfo.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
37#include "llvm/CodeGen/Passes.h"
40#include "llvm/Config/llvm-config.h"
41#include "llvm/IR/InlineAsm.h"
43#include "llvm/Support/Debug.h"
47#include <algorithm>
48#include <bitset>
49using namespace llvm;
50
51#define DEBUG_TYPE "x86-codegen"
52
53STATISTIC(NumFXCH, "Number of fxch instructions inserted");
54STATISTIC(NumFP , "Number of floating point instructions");
55
56namespace {
57 const unsigned ScratchFPReg = 7;
58
59 struct FPS : public MachineFunctionPass {
60 static char ID;
61 FPS() : MachineFunctionPass(ID) {
62 // This is really only to keep valgrind quiet.
63 // The logic in isLive() is too much for it.
64 memset(Stack, 0, sizeof(Stack));
65 memset(RegMap, 0, sizeof(RegMap));
66 }
67
68 void getAnalysisUsage(AnalysisUsage &AU) const override {
69 AU.setPreservesCFG();
74 }
75
76 bool runOnMachineFunction(MachineFunction &MF) override;
77
79 return MachineFunctionProperties().setNoVRegs();
80 }
81
82 StringRef getPassName() const override { return "X86 FP Stackifier"; }
83
84 private:
85 const TargetInstrInfo *TII = nullptr; // Machine instruction info.
86
87 // Two CFG edges are related if they leave the same block, or enter the same
88 // block. The transitive closure of an edge under this relation is a
89 // LiveBundle. It represents a set of CFG edges where the live FP stack
90 // registers must be allocated identically in the x87 stack.
91 //
92 // A LiveBundle is usually all the edges leaving a block, or all the edges
93 // entering a block, but it can contain more edges if critical edges are
94 // present.
95 //
96 // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
97 // but the exact mapping of FP registers to stack slots is fixed later.
98 struct LiveBundle {
99 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
100 unsigned Mask = 0;
101
102 // Number of pre-assigned live registers in FixStack. This is 0 when the
103 // stack order has not yet been fixed.
104 unsigned FixCount = 0;
105
106 // Assigned stack order for live-in registers.
107 // FixStack[i] == getStackEntry(i) for all i < FixCount.
108 unsigned char FixStack[8];
109
110 LiveBundle() = default;
111
112 // Have the live registers been assigned a stack order yet?
113 bool isFixed() const { return !Mask || FixCount; }
114 };
115
116 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
117 // with no live FP registers.
118 SmallVector<LiveBundle, 8> LiveBundles;
119
120 // The edge bundle analysis provides indices into the LiveBundles vector.
121 EdgeBundles *Bundles = nullptr;
122
123 // Return a bitmask of FP registers in block's live-in list.
124 static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) {
125 unsigned Mask = 0;
127 I != MBB->livein_end(); ) {
128 MCPhysReg Reg = I->PhysReg;
129 static_assert(X86::FP6 - X86::FP0 == 6, "sequential regnums");
130 if (Reg >= X86::FP0 && Reg <= X86::FP6) {
131 Mask |= 1 << (Reg - X86::FP0);
132 if (RemoveFPs) {
133 I = MBB->removeLiveIn(I);
134 continue;
135 }
136 }
137 ++I;
138 }
139 return Mask;
140 }
141
142 // Partition all the CFG edges into LiveBundles.
143 void bundleCFGRecomputeKillFlags(MachineFunction &MF);
144
145 MachineBasicBlock *MBB = nullptr; // Current basic block
146
147 // The hardware keeps track of how many FP registers are live, so we have
148 // to model that exactly. Usually, each live register corresponds to an
149 // FP<n> register, but when dealing with calls, returns, and inline
150 // assembly, it is sometimes necessary to have live scratch registers.
151 unsigned Stack[8]; // FP<n> Registers in each stack slot...
152 unsigned StackTop = 0; // The current top of the FP stack.
153
154 enum {
155 NumFPRegs = 8 // Including scratch pseudo-registers.
156 };
157
158 // For each live FP<n> register, point to its Stack[] entry.
159 // The first entries correspond to FP0-FP6, the rest are scratch registers
160 // used when we need slightly different live registers than what the
161 // register allocator thinks.
162 unsigned RegMap[NumFPRegs];
163
164 // Set up our stack model to match the incoming registers to MBB.
165 void setupBlockStack();
166
167 // Shuffle live registers to match the expectations of successor blocks.
168 void finishBlockStack();
169
170#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
171 void dumpStack() const {
172 dbgs() << "Stack contents:";
173 for (unsigned i = 0; i != StackTop; ++i) {
174 dbgs() << " FP" << Stack[i];
175 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
176 }
177 }
178#endif
179
180 /// getSlot - Return the stack slot number a particular register number is
181 /// in.
182 unsigned getSlot(unsigned RegNo) const {
183 assert(RegNo < NumFPRegs && "Regno out of range!");
184 return RegMap[RegNo];
185 }
186
187 /// isLive - Is RegNo currently live in the stack?
188 bool isLive(unsigned RegNo) const {
189 unsigned Slot = getSlot(RegNo);
190 return Slot < StackTop && Stack[Slot] == RegNo;
191 }
192
193 /// getStackEntry - Return the X86::FP<n> register in register ST(i).
194 unsigned getStackEntry(unsigned STi) const {
195 if (STi >= StackTop)
196 report_fatal_error("Access past stack top!");
197 return Stack[StackTop-1-STi];
198 }
199
200 /// getSTReg - Return the X86::ST(i) register which contains the specified
201 /// FP<RegNo> register.
202 unsigned getSTReg(unsigned RegNo) const {
203 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
204 }
205
206 // pushReg - Push the specified FP<n> register onto the stack.
207 void pushReg(unsigned Reg) {
208 assert(Reg < NumFPRegs && "Register number out of range!");
209 if (StackTop >= 8)
210 report_fatal_error("Stack overflow!");
211 Stack[StackTop] = Reg;
212 RegMap[Reg] = StackTop++;
213 }
214
215 // popReg - Pop a register from the stack.
216 void popReg() {
217 if (StackTop == 0)
218 report_fatal_error("Cannot pop empty stack!");
219 RegMap[Stack[--StackTop]] = ~0; // Update state
220 }
221
222 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
223 void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
224 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
225 if (isAtTop(RegNo)) return;
226
227 unsigned STReg = getSTReg(RegNo);
228 unsigned RegOnTop = getStackEntry(0);
229
230 // Swap the slots the regs are in.
231 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
232
233 // Swap stack slot contents.
234 if (RegMap[RegOnTop] >= StackTop)
235 report_fatal_error("Access past stack top!");
236 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
237
238 // Emit an fxch to update the runtime processors version of the state.
239 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
240 ++NumFXCH;
241 }
242
243 void duplicateToTop(unsigned RegNo, unsigned AsReg,
245 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
246 unsigned STReg = getSTReg(RegNo);
247 pushReg(AsReg); // New register on top of stack
248
249 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
250 }
251
252 /// popStackAfter - Pop the current value off of the top of the FP stack
253 /// after the specified instruction.
254 void popStackAfter(MachineBasicBlock::iterator &I);
255
256 /// freeStackSlotAfter - Free the specified register from the register
257 /// stack, so that it is no longer in a register. If the register is
258 /// currently at the top of the stack, we just pop the current instruction,
259 /// otherwise we store the current top-of-stack into the specified slot,
260 /// then pop the top of stack.
261 void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
262
263 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
264 /// instruction.
266 freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
267
268 /// Adjust the live registers to be the set in Mask.
269 void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
270
271 /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
272 /// st(0), FP reg FixStack[1] is st(1) etc.
273 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
275
276 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
277
278 void handleCall(MachineBasicBlock::iterator &I);
279 void handleReturn(MachineBasicBlock::iterator &I);
280 void handleZeroArgFP(MachineBasicBlock::iterator &I);
281 void handleOneArgFP(MachineBasicBlock::iterator &I);
282 void handleOneArgFPRW(MachineBasicBlock::iterator &I);
283 void handleTwoArgFP(MachineBasicBlock::iterator &I);
284 void handleCompareFP(MachineBasicBlock::iterator &I);
285 void handleCondMovFP(MachineBasicBlock::iterator &I);
286 void handleSpecialFP(MachineBasicBlock::iterator &I);
287
288 // Check if a COPY instruction is using FP registers.
289 static bool isFPCopy(MachineInstr &MI) {
290 Register DstReg = MI.getOperand(0).getReg();
291 Register SrcReg = MI.getOperand(1).getReg();
292
293 return X86::RFP80RegClass.contains(DstReg) ||
294 X86::RFP80RegClass.contains(SrcReg);
295 }
296
297 void setKillFlags(MachineBasicBlock &MBB) const;
298 };
299}
300
301char FPS::ID = 0;
302
303INITIALIZE_PASS_BEGIN(FPS, DEBUG_TYPE, "X86 FP Stackifier",
304 false, false)
308
310
311/// getFPReg - Return the X86::FPx register number for the specified operand.
312/// For example, this returns 3 for X86::FP3.
313static unsigned getFPReg(const MachineOperand &MO) {
314 assert(MO.isReg() && "Expected an FP register!");
315 Register Reg = MO.getReg();
316 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
317 return Reg - X86::FP0;
318}
319
320/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
321/// register references into FP stack references.
322///
323bool FPS::runOnMachineFunction(MachineFunction &MF) {
324 // We only need to run this pass if there are any FP registers used in this
325 // function. If it is all integer, there is nothing for us to do!
326 bool FPIsUsed = false;
327
328 static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
329 const MachineRegisterInfo &MRI = MF.getRegInfo();
330 for (unsigned i = 0; i <= 6; ++i)
331 if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
332 FPIsUsed = true;
333 break;
334 }
335
336 // Early exit.
337 if (!FPIsUsed) return false;
338
339 Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
341
342 // Prepare cross-MBB liveness.
343 bundleCFGRecomputeKillFlags(MF);
344
345 StackTop = 0;
346
347 // Process the function in depth first order so that we process at least one
348 // of the predecessors for every reachable block in the function.
351
352 LiveBundle &Bundle =
353 LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
354
355 // In regcall convention, some FP registers may not be passed through
356 // the stack, so they will need to be assigned to the stack first
357 if ((Entry->getParent()->getFunction().getCallingConv() ==
358 CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) {
359 // In the register calling convention, up to one FP argument could be
360 // saved in the first FP register.
361 // If bundle.mask is non-zero and Bundle.FixCount is zero, it means
362 // that the FP registers contain arguments.
363 // The actual value is passed in FP0.
364 // Here we fix the stack and mark FP0 as pre-assigned register.
365 assert((Bundle.Mask & 0xFE) == 0 &&
366 "Only FP0 could be passed as an argument");
367 Bundle.FixCount = 1;
368 Bundle.FixStack[0] = 0;
369 }
370
371 bool Changed = false;
372 for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
373 Changed |= processBasicBlock(MF, *BB);
374
375 // Process any unreachable blocks in arbitrary order now.
376 if (MF.size() != Processed.size())
377 for (MachineBasicBlock &BB : MF)
378 if (Processed.insert(&BB).second)
379 Changed |= processBasicBlock(MF, BB);
380
381 LiveBundles.clear();
382
383 return Changed;
384}
385
386/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
387/// live-out sets for the FP registers. Consistent means that the set of
388/// registers live-out from a block is identical to the live-in set of all
389/// successors. This is not enforced by the normal live-in lists since
390/// registers may be implicitly defined, or not used by all successors.
391void FPS::bundleCFGRecomputeKillFlags(MachineFunction &MF) {
392 assert(LiveBundles.empty() && "Stale data in LiveBundles");
393 LiveBundles.resize(Bundles->getNumBundles());
394
395 // Gather the actual live-in masks for all MBBs.
396 for (MachineBasicBlock &MBB : MF) {
397 setKillFlags(MBB);
398
399 const unsigned Mask = calcLiveInMask(&MBB, false);
400 if (!Mask)
401 continue;
402 // Update MBB ingoing bundle mask.
403 LiveBundles[Bundles->getBundle(MBB.getNumber(), false)].Mask |= Mask;
404 }
405}
406
407/// processBasicBlock - Loop over all of the instructions in the basic block,
408/// transforming FP instructions into their stack form.
409///
410bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
411 bool Changed = false;
412 MBB = &BB;
413
414 setupBlockStack();
415
416 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
417 MachineInstr &MI = *I;
418 uint64_t Flags = MI.getDesc().TSFlags;
419
420 unsigned FPInstClass = Flags & X86II::FPTypeMask;
421 if (MI.isInlineAsm())
422 FPInstClass = X86II::SpecialFP;
423
424 if (MI.isCopy() && isFPCopy(MI))
425 FPInstClass = X86II::SpecialFP;
426
427 if (MI.isImplicitDef() &&
428 X86::RFP80RegClass.contains(MI.getOperand(0).getReg()))
429 FPInstClass = X86II::SpecialFP;
430
431 if (MI.isCall())
432 FPInstClass = X86II::SpecialFP;
433
434 // A fake_use with a floating point pseudo register argument that is
435 // killed must behave like any other floating point operation and pop
436 // the floating point stack (this is done in handleSpecialFP()).
437 // Fake_use is, however, unusual, in that sometimes its operand is not
438 // killed because a later instruction (probably a return) will use it.
439 // It is this instruction that will pop the stack.
440 // In this scenario we can safely remove the fake_use's operand
441 // (it is live anyway).
442 if (MI.isFakeUse()) {
443 const MachineOperand &MO = MI.getOperand(0);
444 if (MO.isReg() && X86::RFP80RegClass.contains(MO.getReg())) {
445 if (MO.isKill())
446 FPInstClass = X86II::SpecialFP;
447 else
448 MI.removeOperand(0);
449 }
450 }
451
452 if (FPInstClass == X86II::NotFP)
453 continue; // Efficiently ignore non-fp insts!
454
455 MachineInstr *PrevMI = nullptr;
456 if (I != BB.begin())
457 PrevMI = &*std::prev(I);
458
459 ++NumFP; // Keep track of # of pseudo instrs
460 LLVM_DEBUG(dbgs() << "\nFPInst:\t" << MI);
461
462 // Get dead variables list now because the MI pointer may be deleted as part
463 // of processing!
465 for (const MachineOperand &MO : MI.operands())
466 if (MO.isReg() && MO.isDead())
467 DeadRegs.push_back(MO.getReg());
468
469 switch (FPInstClass) {
470 case X86II::ZeroArgFP: handleZeroArgFP(I); break;
471 case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
472 case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
473 case X86II::TwoArgFP: handleTwoArgFP(I); break;
474 case X86II::CompareFP: handleCompareFP(I); break;
475 case X86II::CondMovFP: handleCondMovFP(I); break;
476 case X86II::SpecialFP: handleSpecialFP(I); break;
477 default: llvm_unreachable("Unknown FP Type!");
478 }
479
480 // Check to see if any of the values defined by this instruction are dead
481 // after definition. If so, pop them.
482 for (Register Reg : DeadRegs) {
483 // Check if Reg is live on the stack. An inline-asm register operand that
484 // is in the clobber list and marked dead might not be live on the stack.
485 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
486 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
487 LLVM_DEBUG(dbgs() << "Register FP#" << Reg - X86::FP0 << " is dead!\n");
488 freeStackSlotAfter(I, Reg-X86::FP0);
489 }
490 }
491
492 // Print out all of the instructions expanded to if -debug
493 LLVM_DEBUG({
494 MachineBasicBlock::iterator PrevI = PrevMI;
495 if (I == PrevI) {
496 dbgs() << "Just deleted pseudo instruction\n";
497 } else {
499 // Rewind to first instruction newly inserted.
500 while (Start != BB.begin() && std::prev(Start) != PrevI)
501 --Start;
502 dbgs() << "Inserted instructions:\n\t";
503 Start->print(dbgs());
504 while (++Start != std::next(I)) {
505 }
506 }
507 dumpStack();
508 });
509 (void)PrevMI;
510
511 Changed = true;
512 }
513
514 finishBlockStack();
515
516 return Changed;
517}
518
519/// setupBlockStack - Use the live bundles to set up our model of the stack
520/// to match predecessors' live out stack.
521void FPS::setupBlockStack() {
522 LLVM_DEBUG(dbgs() << "\nSetting up live-ins for " << printMBBReference(*MBB)
523 << " derived from " << MBB->getName() << ".\n");
524 StackTop = 0;
525 // Get the live-in bundle for MBB.
526 const LiveBundle &Bundle =
527 LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
528
529 if (!Bundle.Mask) {
530 LLVM_DEBUG(dbgs() << "Block has no FP live-ins.\n");
531 return;
532 }
533
534 // Depth-first iteration should ensure that we always have an assigned stack.
535 assert(Bundle.isFixed() && "Reached block before any predecessors");
536
537 // Push the fixed live-in registers.
538 for (unsigned i = Bundle.FixCount; i > 0; --i) {
539 LLVM_DEBUG(dbgs() << "Live-in st(" << (i - 1) << "): %fp"
540 << unsigned(Bundle.FixStack[i - 1]) << '\n');
541 pushReg(Bundle.FixStack[i-1]);
542 }
543
544 // Kill off unwanted live-ins. This can happen with a critical edge.
545 // FIXME: We could keep these live registers around as zombies. They may need
546 // to be revived at the end of a short block. It might save a few instrs.
547 unsigned Mask = calcLiveInMask(MBB, /*RemoveFPs=*/true);
548 adjustLiveRegs(Mask, MBB->begin());
549 LLVM_DEBUG(MBB->dump());
550}
551
552/// finishBlockStack - Revive live-outs that are implicitly defined out of
553/// MBB. Shuffle live registers to match the expected fixed stack of any
554/// predecessors, and ensure that all predecessors are expecting the same
555/// stack.
556void FPS::finishBlockStack() {
557 // The RET handling below takes care of return blocks for us.
558 if (MBB->succ_empty())
559 return;
560
561 LLVM_DEBUG(dbgs() << "Setting up live-outs for " << printMBBReference(*MBB)
562 << " derived from " << MBB->getName() << ".\n");
563
564 // Get MBB's live-out bundle.
565 unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
566 LiveBundle &Bundle = LiveBundles[BundleIdx];
567
568 // We may need to kill and define some registers to match successors.
569 // FIXME: This can probably be combined with the shuffle below.
571 adjustLiveRegs(Bundle.Mask, Term);
572
573 if (!Bundle.Mask) {
574 LLVM_DEBUG(dbgs() << "No live-outs.\n");
575 return;
576 }
577
578 // Has the stack order been fixed yet?
579 LLVM_DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
580 if (Bundle.isFixed()) {
581 LLVM_DEBUG(dbgs() << "Shuffling stack to match.\n");
582 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
583 } else {
584 // Not fixed yet, we get to choose.
585 LLVM_DEBUG(dbgs() << "Fixing stack order now.\n");
586 Bundle.FixCount = StackTop;
587 for (unsigned i = 0; i < StackTop; ++i)
588 Bundle.FixStack[i] = getStackEntry(i);
589 }
590}
591
592
593//===----------------------------------------------------------------------===//
594// Efficient Lookup Table Support
595//===----------------------------------------------------------------------===//
596
597namespace {
598 struct TableEntry {
599 uint16_t from;
600 uint16_t to;
601 bool operator<(const TableEntry &TE) const { return from < TE.from; }
602 friend bool operator<(const TableEntry &TE, unsigned V) {
603 return TE.from < V;
604 }
605 friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
606 const TableEntry &TE) {
607 return V < TE.from;
608 }
609 };
610}
611
612static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) {
613 const TableEntry *I = llvm::lower_bound(Table, Opcode);
614 if (I != Table.end() && I->from == Opcode)
615 return I->to;
616 return -1;
617}
618
619#ifdef NDEBUG
620#define ASSERT_SORTED(TABLE)
621#else
622#define ASSERT_SORTED(TABLE) \
623 { \
624 static std::atomic<bool> TABLE##Checked(false); \
625 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
626 assert(is_sorted(TABLE) && \
627 "All lookup tables must be sorted for efficient access!"); \
628 TABLE##Checked.store(true, std::memory_order_relaxed); \
629 } \
630 }
631#endif
632
633//===----------------------------------------------------------------------===//
634// Register File -> Register Stack Mapping Methods
635//===----------------------------------------------------------------------===//
636
637// OpcodeTable - Sorted map of register instructions to their stack version.
638// The first element is an register file pseudo instruction, the second is the
639// concrete X86 instruction which uses the register stack.
640//
641static const TableEntry OpcodeTable[] = {
642 { X86::ABS_Fp32 , X86::ABS_F },
643 { X86::ABS_Fp64 , X86::ABS_F },
644 { X86::ABS_Fp80 , X86::ABS_F },
645 { X86::ADD_Fp32m , X86::ADD_F32m },
646 { X86::ADD_Fp64m , X86::ADD_F64m },
647 { X86::ADD_Fp64m32 , X86::ADD_F32m },
648 { X86::ADD_Fp80m32 , X86::ADD_F32m },
649 { X86::ADD_Fp80m64 , X86::ADD_F64m },
650 { X86::ADD_FpI16m32 , X86::ADD_FI16m },
651 { X86::ADD_FpI16m64 , X86::ADD_FI16m },
652 { X86::ADD_FpI16m80 , X86::ADD_FI16m },
653 { X86::ADD_FpI32m32 , X86::ADD_FI32m },
654 { X86::ADD_FpI32m64 , X86::ADD_FI32m },
655 { X86::ADD_FpI32m80 , X86::ADD_FI32m },
656 { X86::CHS_Fp32 , X86::CHS_F },
657 { X86::CHS_Fp64 , X86::CHS_F },
658 { X86::CHS_Fp80 , X86::CHS_F },
659 { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
660 { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
661 { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
662 { X86::CMOVB_Fp32 , X86::CMOVB_F },
663 { X86::CMOVB_Fp64 , X86::CMOVB_F },
664 { X86::CMOVB_Fp80 , X86::CMOVB_F },
665 { X86::CMOVE_Fp32 , X86::CMOVE_F },
666 { X86::CMOVE_Fp64 , X86::CMOVE_F },
667 { X86::CMOVE_Fp80 , X86::CMOVE_F },
668 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
669 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
670 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
671 { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
672 { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
673 { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
674 { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
675 { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
676 { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
677 { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
678 { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
679 { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
680 { X86::CMOVP_Fp32 , X86::CMOVP_F },
681 { X86::CMOVP_Fp64 , X86::CMOVP_F },
682 { X86::CMOVP_Fp80 , X86::CMOVP_F },
683 { X86::COM_FpIr32 , X86::COM_FIr },
684 { X86::COM_FpIr64 , X86::COM_FIr },
685 { X86::COM_FpIr80 , X86::COM_FIr },
686 { X86::COM_Fpr32 , X86::COM_FST0r },
687 { X86::COM_Fpr64 , X86::COM_FST0r },
688 { X86::COM_Fpr80 , X86::COM_FST0r },
689 { X86::DIVR_Fp32m , X86::DIVR_F32m },
690 { X86::DIVR_Fp64m , X86::DIVR_F64m },
691 { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
692 { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
693 { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
694 { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
695 { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
696 { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
697 { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
698 { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
699 { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
700 { X86::DIV_Fp32m , X86::DIV_F32m },
701 { X86::DIV_Fp64m , X86::DIV_F64m },
702 { X86::DIV_Fp64m32 , X86::DIV_F32m },
703 { X86::DIV_Fp80m32 , X86::DIV_F32m },
704 { X86::DIV_Fp80m64 , X86::DIV_F64m },
705 { X86::DIV_FpI16m32 , X86::DIV_FI16m },
706 { X86::DIV_FpI16m64 , X86::DIV_FI16m },
707 { X86::DIV_FpI16m80 , X86::DIV_FI16m },
708 { X86::DIV_FpI32m32 , X86::DIV_FI32m },
709 { X86::DIV_FpI32m64 , X86::DIV_FI32m },
710 { X86::DIV_FpI32m80 , X86::DIV_FI32m },
711 { X86::ILD_Fp16m32 , X86::ILD_F16m },
712 { X86::ILD_Fp16m64 , X86::ILD_F16m },
713 { X86::ILD_Fp16m80 , X86::ILD_F16m },
714 { X86::ILD_Fp32m32 , X86::ILD_F32m },
715 { X86::ILD_Fp32m64 , X86::ILD_F32m },
716 { X86::ILD_Fp32m80 , X86::ILD_F32m },
717 { X86::ILD_Fp64m32 , X86::ILD_F64m },
718 { X86::ILD_Fp64m64 , X86::ILD_F64m },
719 { X86::ILD_Fp64m80 , X86::ILD_F64m },
720 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
721 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
722 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
723 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
724 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
725 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
726 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
727 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
728 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
729 { X86::IST_Fp16m32 , X86::IST_F16m },
730 { X86::IST_Fp16m64 , X86::IST_F16m },
731 { X86::IST_Fp16m80 , X86::IST_F16m },
732 { X86::IST_Fp32m32 , X86::IST_F32m },
733 { X86::IST_Fp32m64 , X86::IST_F32m },
734 { X86::IST_Fp32m80 , X86::IST_F32m },
735 { X86::IST_Fp64m32 , X86::IST_FP64m },
736 { X86::IST_Fp64m64 , X86::IST_FP64m },
737 { X86::IST_Fp64m80 , X86::IST_FP64m },
738 { X86::LD_Fp032 , X86::LD_F0 },
739 { X86::LD_Fp064 , X86::LD_F0 },
740 { X86::LD_Fp080 , X86::LD_F0 },
741 { X86::LD_Fp132 , X86::LD_F1 },
742 { X86::LD_Fp164 , X86::LD_F1 },
743 { X86::LD_Fp180 , X86::LD_F1 },
744 { X86::LD_Fp32m , X86::LD_F32m },
745 { X86::LD_Fp32m64 , X86::LD_F32m },
746 { X86::LD_Fp32m80 , X86::LD_F32m },
747 { X86::LD_Fp64m , X86::LD_F64m },
748 { X86::LD_Fp64m80 , X86::LD_F64m },
749 { X86::LD_Fp80m , X86::LD_F80m },
750 { X86::MUL_Fp32m , X86::MUL_F32m },
751 { X86::MUL_Fp64m , X86::MUL_F64m },
752 { X86::MUL_Fp64m32 , X86::MUL_F32m },
753 { X86::MUL_Fp80m32 , X86::MUL_F32m },
754 { X86::MUL_Fp80m64 , X86::MUL_F64m },
755 { X86::MUL_FpI16m32 , X86::MUL_FI16m },
756 { X86::MUL_FpI16m64 , X86::MUL_FI16m },
757 { X86::MUL_FpI16m80 , X86::MUL_FI16m },
758 { X86::MUL_FpI32m32 , X86::MUL_FI32m },
759 { X86::MUL_FpI32m64 , X86::MUL_FI32m },
760 { X86::MUL_FpI32m80 , X86::MUL_FI32m },
761 { X86::SQRT_Fp32 , X86::SQRT_F },
762 { X86::SQRT_Fp64 , X86::SQRT_F },
763 { X86::SQRT_Fp80 , X86::SQRT_F },
764 { X86::ST_Fp32m , X86::ST_F32m },
765 { X86::ST_Fp64m , X86::ST_F64m },
766 { X86::ST_Fp64m32 , X86::ST_F32m },
767 { X86::ST_Fp80m32 , X86::ST_F32m },
768 { X86::ST_Fp80m64 , X86::ST_F64m },
769 { X86::ST_FpP80m , X86::ST_FP80m },
770 { X86::SUBR_Fp32m , X86::SUBR_F32m },
771 { X86::SUBR_Fp64m , X86::SUBR_F64m },
772 { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
773 { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
774 { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
775 { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
776 { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
777 { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
778 { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
779 { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
780 { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
781 { X86::SUB_Fp32m , X86::SUB_F32m },
782 { X86::SUB_Fp64m , X86::SUB_F64m },
783 { X86::SUB_Fp64m32 , X86::SUB_F32m },
784 { X86::SUB_Fp80m32 , X86::SUB_F32m },
785 { X86::SUB_Fp80m64 , X86::SUB_F64m },
786 { X86::SUB_FpI16m32 , X86::SUB_FI16m },
787 { X86::SUB_FpI16m64 , X86::SUB_FI16m },
788 { X86::SUB_FpI16m80 , X86::SUB_FI16m },
789 { X86::SUB_FpI32m32 , X86::SUB_FI32m },
790 { X86::SUB_FpI32m64 , X86::SUB_FI32m },
791 { X86::SUB_FpI32m80 , X86::SUB_FI32m },
792 { X86::TST_Fp32 , X86::TST_F },
793 { X86::TST_Fp64 , X86::TST_F },
794 { X86::TST_Fp80 , X86::TST_F },
795 { X86::UCOM_FpIr32 , X86::UCOM_FIr },
796 { X86::UCOM_FpIr64 , X86::UCOM_FIr },
797 { X86::UCOM_FpIr80 , X86::UCOM_FIr },
798 { X86::UCOM_Fpr32 , X86::UCOM_Fr },
799 { X86::UCOM_Fpr64 , X86::UCOM_Fr },
800 { X86::UCOM_Fpr80 , X86::UCOM_Fr },
801 { X86::XAM_Fp32 , X86::XAM_F },
802 { X86::XAM_Fp64 , X86::XAM_F },
803 { X86::XAM_Fp80 , X86::XAM_F },
804};
805
806static unsigned getConcreteOpcode(unsigned Opcode) {
808 int Opc = Lookup(OpcodeTable, Opcode);
809 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
810 return Opc;
811}
812
813//===----------------------------------------------------------------------===//
814// Helper Methods
815//===----------------------------------------------------------------------===//
816
817// PopTable - Sorted map of instructions to their popping version. The first
818// element is an instruction, the second is the version which pops.
819//
820static const TableEntry PopTable[] = {
821 { X86::ADD_FrST0 , X86::ADD_FPrST0 },
822
823 { X86::COMP_FST0r, X86::FCOMPP },
824 { X86::COM_FIr , X86::COM_FIPr },
825 { X86::COM_FST0r , X86::COMP_FST0r },
826
827 { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
828 { X86::DIV_FrST0 , X86::DIV_FPrST0 },
829
830 { X86::IST_F16m , X86::IST_FP16m },
831 { X86::IST_F32m , X86::IST_FP32m },
832
833 { X86::MUL_FrST0 , X86::MUL_FPrST0 },
834
835 { X86::ST_F32m , X86::ST_FP32m },
836 { X86::ST_F64m , X86::ST_FP64m },
837 { X86::ST_Frr , X86::ST_FPrr },
838
839 { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
840 { X86::SUB_FrST0 , X86::SUB_FPrST0 },
841
842 { X86::UCOM_FIr , X86::UCOM_FIPr },
843
844 { X86::UCOM_FPr , X86::UCOM_FPPr },
845 { X86::UCOM_Fr , X86::UCOM_FPr },
846};
847
849 if (const MachineOperand *MO =
850 MI.findRegisterDefOperand(X86::FPSW, /*TRI=*/nullptr))
851 if (!MO->isDead())
852 return true;
853 return false;
854}
855
858 MachineBasicBlock &MBB = *I->getParent();
859 while (++I != MBB.end()) {
860 MachineInstr &MI = *I;
862 return I;
863 }
864 return MBB.end();
865}
866
867/// popStackAfter - Pop the current value off of the top of the FP stack after
868/// the specified instruction. This attempts to be sneaky and combine the pop
869/// into the instruction itself if possible. The iterator is left pointing to
870/// the last instruction, be it a new pop instruction inserted, or the old
871/// instruction if it was modified in place.
872///
873void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
874 MachineInstr &MI = *I;
875 const DebugLoc &dl = MI.getDebugLoc();
877
878 popReg();
879
880 // Check to see if there is a popping version of this instruction...
881 int Opcode = Lookup(PopTable, I->getOpcode());
882 if (Opcode != -1) {
883 I->setDesc(TII->get(Opcode));
884 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
885 I->removeOperand(0);
886 MI.dropDebugNumber();
887 } else { // Insert an explicit pop
888 // If this instruction sets FPSW, which is read in following instruction,
889 // insert pop after that reader.
891 MachineBasicBlock &MBB = *MI.getParent();
893 if (Next != MBB.end() && Next->readsRegister(X86::FPSW, /*TRI=*/nullptr))
894 I = Next;
895 }
896 I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
897 }
898}
899
900/// freeStackSlotAfter - Free the specified register from the register stack, so
901/// that it is no longer in a register. If the register is currently at the top
902/// of the stack, we just pop the current instruction, otherwise we store the
903/// current top-of-stack into the specified slot, then pop the top of stack.
904void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
905 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
906 popStackAfter(I);
907 return;
908 }
909
910 // Otherwise, store the top of stack into the dead slot, killing the operand
911 // without having to add in an explicit xchg then pop.
912 //
913 I = freeStackSlotBefore(++I, FPRegNo);
914}
915
916/// freeStackSlotBefore - Free the specified register without trying any
917/// folding.
919FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
920 unsigned STReg = getSTReg(FPRegNo);
921 unsigned OldSlot = getSlot(FPRegNo);
922 unsigned TopReg = Stack[StackTop-1];
923 Stack[OldSlot] = TopReg;
924 RegMap[TopReg] = OldSlot;
925 RegMap[FPRegNo] = ~0;
926 Stack[--StackTop] = ~0;
927 return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr))
928 .addReg(STReg)
929 .getInstr();
930}
931
932/// adjustLiveRegs - Kill and revive registers such that exactly the FP
933/// registers with a bit in Mask are live.
934void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
935 unsigned Defs = Mask;
936 unsigned Kills = 0;
937 for (unsigned i = 0; i < StackTop; ++i) {
938 unsigned RegNo = Stack[i];
939 if (!(Defs & (1 << RegNo)))
940 // This register is live, but we don't want it.
941 Kills |= (1 << RegNo);
942 else
943 // We don't need to imp-def this live register.
944 Defs &= ~(1 << RegNo);
945 }
946 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
947
948 // Produce implicit-defs for free by using killed registers.
949 while (Kills && Defs) {
950 unsigned KReg = llvm::countr_zero(Kills);
951 unsigned DReg = llvm::countr_zero(Defs);
952 LLVM_DEBUG(dbgs() << "Renaming %fp" << KReg << " as imp %fp" << DReg
953 << "\n");
954 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
955 std::swap(RegMap[KReg], RegMap[DReg]);
956 Kills &= ~(1 << KReg);
957 Defs &= ~(1 << DReg);
958 }
959
960 // Kill registers by popping.
961 if (Kills && I != MBB->begin()) {
962 MachineBasicBlock::iterator I2 = std::prev(I);
963 while (StackTop) {
964 unsigned KReg = getStackEntry(0);
965 if (!(Kills & (1 << KReg)))
966 break;
967 LLVM_DEBUG(dbgs() << "Popping %fp" << KReg << "\n");
968 popStackAfter(I2);
969 Kills &= ~(1 << KReg);
970 }
971 }
972
973 // Manually kill the rest.
974 while (Kills) {
975 unsigned KReg = llvm::countr_zero(Kills);
976 LLVM_DEBUG(dbgs() << "Killing %fp" << KReg << "\n");
977 freeStackSlotBefore(I, KReg);
978 Kills &= ~(1 << KReg);
979 }
980
981 // Load zeros for all the imp-defs.
982 while(Defs) {
983 unsigned DReg = llvm::countr_zero(Defs);
984 LLVM_DEBUG(dbgs() << "Defining %fp" << DReg << " as 0\n");
985 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
986 pushReg(DReg);
987 Defs &= ~(1 << DReg);
988 }
989
990 // Now we should have the correct registers live.
991 LLVM_DEBUG(dumpStack());
992 assert(StackTop == (unsigned)llvm::popcount(Mask) && "Live count mismatch");
993}
994
995/// shuffleStackTop - emit fxch instructions before I to shuffle the top
996/// FixCount entries into the order given by FixStack.
997/// FIXME: Is there a better algorithm than insertion sort?
998void FPS::shuffleStackTop(const unsigned char *FixStack,
999 unsigned FixCount,
1001 // Move items into place, starting from the desired stack bottom.
1002 while (FixCount--) {
1003 // Old register at position FixCount.
1004 unsigned OldReg = getStackEntry(FixCount);
1005 // Desired register at position FixCount.
1006 unsigned Reg = FixStack[FixCount];
1007 if (Reg == OldReg)
1008 continue;
1009 // (Reg st0) (OldReg st0) = (Reg OldReg st0)
1010 moveToTop(Reg, I);
1011 if (FixCount > 0)
1012 moveToTop(OldReg, I);
1013 }
1014 LLVM_DEBUG(dumpStack());
1015}
1016
1017
1018//===----------------------------------------------------------------------===//
1019// Instruction transformation implementation
1020//===----------------------------------------------------------------------===//
1021
1022void FPS::handleCall(MachineBasicBlock::iterator &I) {
1023 MachineInstr &MI = *I;
1024 unsigned STReturns = 0;
1025
1026 bool ClobbersFPStack = false;
1027 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1028 MachineOperand &Op = MI.getOperand(i);
1029 // Check if this call clobbers the FP stack.
1030 // is sufficient.
1031 if (Op.isRegMask()) {
1032 bool ClobbersFP0 = Op.clobbersPhysReg(X86::FP0);
1033#ifndef NDEBUG
1034 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
1035 for (unsigned i = 1; i != 8; ++i)
1036 assert(Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&
1037 "Inconsistent FP register clobber");
1038#endif
1039
1040 if (ClobbersFP0)
1041 ClobbersFPStack = true;
1042 }
1043
1044 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1045 continue;
1046
1047 assert(Op.isImplicit() && "Expected implicit def/use");
1048
1049 if (Op.isDef())
1050 STReturns |= 1 << getFPReg(Op);
1051
1052 // Remove the operand so that later passes don't see it.
1053 MI.removeOperand(i);
1054 --i;
1055 --e;
1056 }
1057
1058 // Most calls should have a regmask that clobbers the FP registers. If it
1059 // isn't present then the register allocator didn't spill the FP registers
1060 // so they are still on the stack.
1061 assert((ClobbersFPStack || STReturns == 0) &&
1062 "ST returns without FP stack clobber");
1063 if (!ClobbersFPStack)
1064 return;
1065
1066 unsigned N = llvm::countr_one(STReturns);
1067
1068 // FP registers used for function return must be consecutive starting at
1069 // FP0
1070 assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
1071
1072 // Reset the FP Stack - It is required because of possible leftovers from
1073 // passed arguments. The caller should assume that the FP stack is
1074 // returned empty (unless the callee returns values on FP stack).
1075 while (StackTop > 0)
1076 popReg();
1077
1078 for (unsigned I = 0; I < N; ++I)
1079 pushReg(N - I - 1);
1080
1081 // If this call has been modified, drop all variable values defined by it.
1082 // We can't track them once they've been stackified.
1083 if (STReturns)
1084 I->dropDebugNumber();
1085}
1086
1087/// If RET has an FP register use operand, pass the first one in ST(0) and
1088/// the second one in ST(1).
1089void FPS::handleReturn(MachineBasicBlock::iterator &I) {
1090 MachineInstr &MI = *I;
1091
1092 // Find the register operands.
1093 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1094 unsigned LiveMask = 0;
1095
1096 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1097 MachineOperand &Op = MI.getOperand(i);
1098 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1099 continue;
1100 // FP Register uses must be kills unless there are two uses of the same
1101 // register, in which case only one will be a kill.
1102 assert(Op.isUse() &&
1103 (Op.isKill() || // Marked kill.
1104 getFPReg(Op) == FirstFPRegOp || // Second instance.
1105 MI.killsRegister(Op.getReg(),
1106 /*TRI=*/nullptr)) && // Later use is marked kill.
1107 "Ret only defs operands, and values aren't live beyond it");
1108
1109 if (FirstFPRegOp == ~0U)
1110 FirstFPRegOp = getFPReg(Op);
1111 else {
1112 assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1113 SecondFPRegOp = getFPReg(Op);
1114 }
1115 LiveMask |= (1 << getFPReg(Op));
1116
1117 // Remove the operand so that later passes don't see it.
1118 MI.removeOperand(i);
1119 --i;
1120 --e;
1121 }
1122
1123 // We may have been carrying spurious live-ins, so make sure only the
1124 // returned registers are left live.
1125 adjustLiveRegs(LiveMask, MI);
1126 if (!LiveMask) return; // Quick check to see if any are possible.
1127
1128 // There are only four possibilities here:
1129 // 1) we are returning a single FP value. In this case, it has to be in
1130 // ST(0) already, so just declare success by removing the value from the
1131 // FP Stack.
1132 if (SecondFPRegOp == ~0U) {
1133 // Assert that the top of stack contains the right FP register.
1134 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1135 "Top of stack not the right register for RET!");
1136
1137 // Ok, everything is good, mark the value as not being on the stack
1138 // anymore so that our assertion about the stack being empty at end of
1139 // block doesn't fire.
1140 StackTop = 0;
1141 return;
1142 }
1143
1144 // Otherwise, we are returning two values:
1145 // 2) If returning the same value for both, we only have one thing in the FP
1146 // stack. Consider: RET FP1, FP1
1147 if (StackTop == 1) {
1148 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1149 "Stack misconfiguration for RET!");
1150
1151 // Duplicate the TOS so that we return it twice. Just pick some other FPx
1152 // register to hold it.
1153 unsigned NewReg = ScratchFPReg;
1154 duplicateToTop(FirstFPRegOp, NewReg, MI);
1155 FirstFPRegOp = NewReg;
1156 }
1157
1158 /// Okay we know we have two different FPx operands now:
1159 assert(StackTop == 2 && "Must have two values live!");
1160
1161 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1162 /// in ST(1). In this case, emit an fxch.
1163 if (getStackEntry(0) == SecondFPRegOp) {
1164 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1165 moveToTop(FirstFPRegOp, MI);
1166 }
1167
1168 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1169 /// ST(1). Just remove both from our understanding of the stack and return.
1170 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1171 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1172 StackTop = 0;
1173}
1174
1175/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
1176///
1177void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
1178 MachineInstr &MI = *I;
1179 unsigned DestReg = getFPReg(MI.getOperand(0));
1180
1181 // Change from the pseudo instruction to the concrete instruction.
1182 MI.removeOperand(0); // Remove the explicit ST(0) operand
1183 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1184 MI.addOperand(
1185 MachineOperand::CreateReg(X86::ST0, /*isDef*/ true, /*isImp*/ true));
1186
1187 // Result gets pushed on the stack.
1188 pushReg(DestReg);
1189
1190 MI.dropDebugNumber();
1191}
1192
1193/// handleOneArgFP - fst <mem>, ST(0)
1194///
1195void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
1196 MachineInstr &MI = *I;
1197 unsigned NumOps = MI.getDesc().getNumOperands();
1198 assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
1199 "Can only handle fst* & ftst instructions!");
1200
1201 // Is this the last use of the source register?
1202 unsigned Reg = getFPReg(MI.getOperand(NumOps - 1));
1203 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg, /*TRI=*/nullptr);
1204
1205 // FISTP64m is strange because there isn't a non-popping versions.
1206 // If we have one _and_ we don't want to pop the operand, duplicate the value
1207 // on the stack instead of moving it. This ensure that popping the value is
1208 // always ok.
1209 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
1210 //
1211 if (!KillsSrc && (MI.getOpcode() == X86::IST_Fp64m32 ||
1212 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1213 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1214 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1215 MI.getOpcode() == X86::IST_Fp64m64 ||
1216 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1217 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1218 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1219 MI.getOpcode() == X86::IST_Fp64m80 ||
1220 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1221 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1222 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1223 MI.getOpcode() == X86::ST_FpP80m)) {
1224 duplicateToTop(Reg, ScratchFPReg, I);
1225 } else {
1226 moveToTop(Reg, I); // Move to the top of the stack...
1227 }
1228
1229 // Convert from the pseudo instruction to the concrete instruction.
1230 MI.removeOperand(NumOps - 1); // Remove explicit ST(0) operand
1231 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1232 MI.addOperand(
1233 MachineOperand::CreateReg(X86::ST0, /*isDef*/ false, /*isImp*/ true));
1234
1235 if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m ||
1236 MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m ||
1237 MI.getOpcode() == X86::ST_FP80m) {
1238 if (StackTop == 0)
1239 report_fatal_error("Stack empty??");
1240 --StackTop;
1241 } else if (KillsSrc) { // Last use of operand?
1242 popStackAfter(I);
1243 }
1244
1245 MI.dropDebugNumber();
1246}
1247
1248
1249/// handleOneArgFPRW: Handle instructions that read from the top of stack and
1250/// replace the value with a newly computed value. These instructions may have
1251/// non-fp operands after their FP operands.
1252///
1253/// Examples:
1254/// R1 = fchs R2
1255/// R1 = fadd R2, [mem]
1256///
1257void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
1258 MachineInstr &MI = *I;
1259#ifndef NDEBUG
1260 unsigned NumOps = MI.getDesc().getNumOperands();
1261 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
1262#endif
1263
1264 // Is this the last use of the source register?
1265 unsigned Reg = getFPReg(MI.getOperand(1));
1266 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg, /*TRI=*/nullptr);
1267
1268 if (KillsSrc) {
1269 // If this is the last use of the source register, just make sure it's on
1270 // the top of the stack.
1271 moveToTop(Reg, I);
1272 if (StackTop == 0)
1273 report_fatal_error("Stack cannot be empty!");
1274 --StackTop;
1275 pushReg(getFPReg(MI.getOperand(0)));
1276 } else {
1277 // If this is not the last use of the source register, _copy_ it to the top
1278 // of the stack.
1279 duplicateToTop(Reg, getFPReg(MI.getOperand(0)), I);
1280 }
1281
1282 // Change from the pseudo instruction to the concrete instruction.
1283 MI.removeOperand(1); // Drop the source operand.
1284 MI.removeOperand(0); // Drop the destination operand.
1285 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1286 MI.dropDebugNumber();
1287}
1288
1289
1290//===----------------------------------------------------------------------===//
1291// Define tables of various ways to map pseudo instructions
1292//
1293
1294// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
1295static const TableEntry ForwardST0Table[] = {
1296 { X86::ADD_Fp32 , X86::ADD_FST0r },
1297 { X86::ADD_Fp64 , X86::ADD_FST0r },
1298 { X86::ADD_Fp80 , X86::ADD_FST0r },
1299 { X86::DIV_Fp32 , X86::DIV_FST0r },
1300 { X86::DIV_Fp64 , X86::DIV_FST0r },
1301 { X86::DIV_Fp80 , X86::DIV_FST0r },
1302 { X86::MUL_Fp32 , X86::MUL_FST0r },
1303 { X86::MUL_Fp64 , X86::MUL_FST0r },
1304 { X86::MUL_Fp80 , X86::MUL_FST0r },
1305 { X86::SUB_Fp32 , X86::SUB_FST0r },
1306 { X86::SUB_Fp64 , X86::SUB_FST0r },
1307 { X86::SUB_Fp80 , X86::SUB_FST0r },
1308};
1309
1310// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
1311static const TableEntry ReverseST0Table[] = {
1312 { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative
1313 { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative
1314 { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative
1315 { X86::DIV_Fp32 , X86::DIVR_FST0r },
1316 { X86::DIV_Fp64 , X86::DIVR_FST0r },
1317 { X86::DIV_Fp80 , X86::DIVR_FST0r },
1318 { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative
1319 { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative
1320 { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative
1321 { X86::SUB_Fp32 , X86::SUBR_FST0r },
1322 { X86::SUB_Fp64 , X86::SUBR_FST0r },
1323 { X86::SUB_Fp80 , X86::SUBR_FST0r },
1324};
1325
1326// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
1327static const TableEntry ForwardSTiTable[] = {
1328 { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative
1329 { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative
1330 { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative
1331 { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1332 { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1333 { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1334 { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative
1335 { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative
1336 { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative
1337 { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1338 { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1339 { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1340};
1341
1342// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
1343static const TableEntry ReverseSTiTable[] = {
1344 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1345 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1346 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1347 { X86::DIV_Fp32 , X86::DIV_FrST0 },
1348 { X86::DIV_Fp64 , X86::DIV_FrST0 },
1349 { X86::DIV_Fp80 , X86::DIV_FrST0 },
1350 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1351 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1352 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1353 { X86::SUB_Fp32 , X86::SUB_FrST0 },
1354 { X86::SUB_Fp64 , X86::SUB_FrST0 },
1355 { X86::SUB_Fp80 , X86::SUB_FrST0 },
1356};
1357
1358
1359/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1360/// instructions which need to be simplified and possibly transformed.
1361///
1362/// Result: ST(0) = fsub ST(0), ST(i)
1363/// ST(i) = fsub ST(0), ST(i)
1364/// ST(0) = fsubr ST(0), ST(i)
1365/// ST(i) = fsubr ST(0), ST(i)
1366///
1367void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
1370 MachineInstr &MI = *I;
1371
1372 unsigned NumOperands = MI.getDesc().getNumOperands();
1373 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1374 unsigned Dest = getFPReg(MI.getOperand(0));
1375 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1376 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1377 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0, /*TRI=*/nullptr);
1378 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, /*TRI=*/nullptr);
1379 const DebugLoc &dl = MI.getDebugLoc();
1380
1381 unsigned TOS = getStackEntry(0);
1382
1383 // One of our operands must be on the top of the stack. If neither is yet, we
1384 // need to move one.
1385 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
1386 // We can choose to move either operand to the top of the stack. If one of
1387 // the operands is killed by this instruction, we want that one so that we
1388 // can update right on top of the old version.
1389 if (KillsOp0) {
1390 moveToTop(Op0, I); // Move dead operand to TOS.
1391 TOS = Op0;
1392 } else if (KillsOp1) {
1393 moveToTop(Op1, I);
1394 TOS = Op1;
1395 } else {
1396 // All of the operands are live after this instruction executes, so we
1397 // cannot update on top of any operand. Because of this, we must
1398 // duplicate one of the stack elements to the top. It doesn't matter
1399 // which one we pick.
1400 //
1401 duplicateToTop(Op0, Dest, I);
1402 Op0 = TOS = Dest;
1403 KillsOp0 = true;
1404 }
1405 } else if (!KillsOp0 && !KillsOp1) {
1406 // If we DO have one of our operands at the top of the stack, but we don't
1407 // have a dead operand, we must duplicate one of the operands to a new slot
1408 // on the stack.
1409 duplicateToTop(Op0, Dest, I);
1410 Op0 = TOS = Dest;
1411 KillsOp0 = true;
1412 }
1413
1414 // Now we know that one of our operands is on the top of the stack, and at
1415 // least one of our operands is killed by this instruction.
1416 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1417 "Stack conditions not set up right!");
1418
1419 // We decide which form to use based on what is on the top of the stack, and
1420 // which operand is killed by this instruction.
1421 ArrayRef<TableEntry> InstTable;
1422 bool isForward = TOS == Op0;
1423 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1424 if (updateST0) {
1425 if (isForward)
1426 InstTable = ForwardST0Table;
1427 else
1428 InstTable = ReverseST0Table;
1429 } else {
1430 if (isForward)
1431 InstTable = ForwardSTiTable;
1432 else
1433 InstTable = ReverseSTiTable;
1434 }
1435
1436 int Opcode = Lookup(InstTable, MI.getOpcode());
1437 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1438
1439 // NotTOS - The register which is not on the top of stack...
1440 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1441
1442 // Replace the old instruction with a new instruction
1443 MBB->remove(&*I++);
1444 I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
1445
1446 if (!MI.mayRaiseFPException())
1447 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1448
1449 // If both operands are killed, pop one off of the stack in addition to
1450 // overwriting the other one.
1451 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1452 assert(!updateST0 && "Should have updated other operand!");
1453 popStackAfter(I); // Pop the top of stack
1454 }
1455
1456 // Update stack information so that we know the destination register is now on
1457 // the stack.
1458 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1459 assert(UpdatedSlot < StackTop && Dest < 7);
1460 Stack[UpdatedSlot] = Dest;
1461 RegMap[Dest] = UpdatedSlot;
1462 MBB->getParent()->deleteMachineInstr(&MI); // Remove the old instruction
1463}
1464
1465/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1466/// register arguments and no explicit destinations.
1467///
1468void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
1469 MachineInstr &MI = *I;
1470
1471 unsigned NumOperands = MI.getDesc().getNumOperands();
1472 assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1473 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1474 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1475 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0, /*TRI=*/nullptr);
1476 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, /*TRI=*/nullptr);
1477
1478 // Make sure the first operand is on the top of stack, the other one can be
1479 // anywhere.
1480 moveToTop(Op0, I);
1481
1482 // Change from the pseudo instruction to the concrete instruction.
1483 MI.getOperand(0).setReg(getSTReg(Op1));
1484 MI.removeOperand(1);
1485 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1486 MI.dropDebugNumber();
1487
1488 // If any of the operands are killed by this instruction, free them.
1489 if (KillsOp0) freeStackSlotAfter(I, Op0);
1490 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
1491}
1492
1493/// handleCondMovFP - Handle two address conditional move instructions. These
1494/// instructions move a st(i) register to st(0) iff a condition is true. These
1495/// instructions require that the first operand is at the top of the stack, but
1496/// otherwise don't modify the stack at all.
1497void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
1498 MachineInstr &MI = *I;
1499
1500 unsigned Op0 = getFPReg(MI.getOperand(0));
1501 unsigned Op1 = getFPReg(MI.getOperand(2));
1502 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, /*TRI=*/nullptr);
1503
1504 // The first operand *must* be on the top of the stack.
1505 moveToTop(Op0, I);
1506
1507 // Change the second operand to the stack register that the operand is in.
1508 // Change from the pseudo instruction to the concrete instruction.
1509 MI.removeOperand(0);
1510 MI.removeOperand(1);
1511 MI.getOperand(0).setReg(getSTReg(Op1));
1512 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1513 MI.dropDebugNumber();
1514
1515 // If we kill the second operand, make sure to pop it from the stack.
1516 if (Op0 != Op1 && KillsOp1) {
1517 // Get this value off of the register stack.
1518 freeStackSlotAfter(I, Op1);
1519 }
1520}
1521
1522
1523/// handleSpecialFP - Handle special instructions which behave unlike other
1524/// floating point instructions. This is primarily intended for use by pseudo
1525/// instructions.
1526///
1527void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
1528 MachineInstr &MI = *Inst;
1529
1530 if (MI.isCall()) {
1531 handleCall(Inst);
1532 return;
1533 }
1534
1535 if (MI.isReturn()) {
1536 handleReturn(Inst);
1537 return;
1538 }
1539
1540 switch (MI.getOpcode()) {
1541 default: llvm_unreachable("Unknown SpecialFP instruction!");
1542 case TargetOpcode::COPY: {
1543 // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
1544 const MachineOperand &MO1 = MI.getOperand(1);
1545 const MachineOperand &MO0 = MI.getOperand(0);
1546 bool KillsSrc = MI.killsRegister(MO1.getReg(), /*TRI=*/nullptr);
1547
1548 // FP <- FP copy.
1549 unsigned DstFP = getFPReg(MO0);
1550 unsigned SrcFP = getFPReg(MO1);
1551 assert(isLive(SrcFP) && "Cannot copy dead register");
1552 if (KillsSrc) {
1553 // If the input operand is killed, we can just change the owner of the
1554 // incoming stack slot into the result.
1555 unsigned Slot = getSlot(SrcFP);
1556 Stack[Slot] = DstFP;
1557 RegMap[DstFP] = Slot;
1558 } else {
1559 // For COPY we just duplicate the specified value to a new stack slot.
1560 // This could be made better, but would require substantial changes.
1561 duplicateToTop(SrcFP, DstFP, Inst);
1562 }
1563 break;
1564 }
1565
1566 case TargetOpcode::IMPLICIT_DEF: {
1567 // All FP registers must be explicitly defined, so load a 0 instead.
1568 unsigned Reg = MI.getOperand(0).getReg() - X86::FP0;
1569 LLVM_DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
1570 BuildMI(*MBB, Inst, MI.getDebugLoc(), TII->get(X86::LD_F0));
1571 pushReg(Reg);
1572 break;
1573 }
1574
1575 case TargetOpcode::INLINEASM:
1576 case TargetOpcode::INLINEASM_BR: {
1577 // The inline asm MachineInstr currently only *uses* FP registers for the
1578 // 'f' constraint. These should be turned into the current ST(x) register
1579 // in the machine instr.
1580 //
1581 // There are special rules for x87 inline assembly. The compiler must know
1582 // exactly how many registers are popped and pushed implicitly by the asm.
1583 // Otherwise it is not possible to restore the stack state after the inline
1584 // asm.
1585 //
1586 // There are 3 kinds of input operands:
1587 //
1588 // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
1589 // popped input operand must be in a fixed stack slot, and it is either
1590 // tied to an output operand, or in the clobber list. The MI has ST use
1591 // and def operands for these inputs.
1592 //
1593 // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
1594 // preserved by the inline asm. The fixed stack slots must be STn-STm
1595 // following the popped inputs. A fixed input operand cannot be tied to
1596 // an output or appear in the clobber list. The MI has ST use operands
1597 // and no defs for these inputs.
1598 //
1599 // 3. Preserved inputs. These inputs use the "f" constraint which is
1600 // represented as an FP register. The inline asm won't change these
1601 // stack slots.
1602 //
1603 // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
1604 // registers do not count as output operands. The inline asm changes the
1605 // stack as if it popped all the popped inputs and then pushed all the
1606 // output operands.
1607
1608 // Scan the assembly for ST registers used, defined and clobbered. We can
1609 // only tell clobbers from defs by looking at the asm descriptor.
1610 unsigned STUses = 0, STDefs = 0, STClobbers = 0;
1611 unsigned NumOps = 0;
1612 SmallSet<unsigned, 1> FRegIdx;
1613 unsigned RCID;
1614
1615 for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI.getNumOperands();
1616 i != e && MI.getOperand(i).isImm(); i += 1 + NumOps) {
1617 unsigned Flags = MI.getOperand(i).getImm();
1618 const InlineAsm::Flag F(Flags);
1619
1620 NumOps = F.getNumOperandRegisters();
1621 if (NumOps != 1)
1622 continue;
1623 const MachineOperand &MO = MI.getOperand(i + 1);
1624 if (!MO.isReg())
1625 continue;
1626 unsigned STReg = MO.getReg() - X86::FP0;
1627 if (STReg >= 8)
1628 continue;
1629
1630 // If the flag has a register class constraint, this must be an operand
1631 // with constraint "f". Record its index and continue.
1632 if (F.hasRegClassConstraint(RCID)) {
1633 FRegIdx.insert(i + 1);
1634 continue;
1635 }
1636
1637 switch (F.getKind()) {
1638 case InlineAsm::Kind::RegUse:
1639 STUses |= (1u << STReg);
1640 break;
1641 case InlineAsm::Kind::RegDef:
1642 case InlineAsm::Kind::RegDefEarlyClobber:
1643 STDefs |= (1u << STReg);
1644 break;
1645 case InlineAsm::Kind::Clobber:
1646 STClobbers |= (1u << STReg);
1647 break;
1648 default:
1649 break;
1650 }
1651 }
1652
1653 if (STUses && !isMask_32(STUses))
1654 MI.emitGenericError("fixed input regs must be last on the x87 stack");
1655 unsigned NumSTUses = llvm::countr_one(STUses);
1656
1657 // Defs must be contiguous from the stack top. ST0-STn.
1658 if (STDefs && !isMask_32(STDefs)) {
1659 MI.emitGenericError("output regs must be last on the x87 stack");
1660 STDefs = NextPowerOf2(STDefs) - 1;
1661 }
1662 unsigned NumSTDefs = llvm::countr_one(STDefs);
1663
1664 // So must the clobbered stack slots. ST0-STm, m >= n.
1665 if (STClobbers && !isMask_32(STDefs | STClobbers))
1666 MI.emitGenericError("clobbers must be last on the x87 stack");
1667
1668 // Popped inputs are the ones that are also clobbered or defined.
1669 unsigned STPopped = STUses & (STDefs | STClobbers);
1670 if (STPopped && !isMask_32(STPopped))
1671 MI.emitGenericError(
1672 "implicitly popped regs must be last on the x87 stack");
1673 unsigned NumSTPopped = llvm::countr_one(STPopped);
1674
1675 LLVM_DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
1676 << NumSTPopped << ", and defines " << NumSTDefs
1677 << " regs.\n");
1678
1679#ifndef NDEBUG
1680 // If any input operand uses constraint "f", all output register
1681 // constraints must be early-clobber defs.
1682 for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I)
1683 if (FRegIdx.count(I)) {
1684 assert((1 << getFPReg(MI.getOperand(I)) & STDefs) == 0 &&
1685 "Operands with constraint \"f\" cannot overlap with defs");
1686 }
1687#endif
1688
1689 // Collect all FP registers (register operands with constraints "t", "u",
1690 // and "f") to kill afer the instruction.
1691 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1692 for (const MachineOperand &Op : MI.operands()) {
1693 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1694 continue;
1695 unsigned FPReg = getFPReg(Op);
1696
1697 // If we kill this operand, make sure to pop it from the stack after the
1698 // asm. We just remember it for now, and pop them all off at the end in
1699 // a batch.
1700 if (Op.isUse() && Op.isKill())
1701 FPKills |= 1U << FPReg;
1702 }
1703
1704 // Do not include registers that are implicitly popped by defs/clobbers.
1705 FPKills &= ~(STDefs | STClobbers);
1706
1707 // Now we can rearrange the live registers to match what was requested.
1708 unsigned char STUsesArray[8];
1709
1710 for (unsigned I = 0; I < NumSTUses; ++I)
1711 STUsesArray[I] = I;
1712
1713 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1714 LLVM_DEBUG({
1715 dbgs() << "Before asm: ";
1716 dumpStack();
1717 });
1718
1719 // With the stack layout fixed, rewrite the FP registers.
1720 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1721 MachineOperand &Op = MI.getOperand(i);
1722 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1723 continue;
1724
1725 unsigned FPReg = getFPReg(Op);
1726
1727 if (FRegIdx.count(i))
1728 // Operand with constraint "f".
1729 Op.setReg(getSTReg(FPReg));
1730 else
1731 // Operand with a single register class constraint ("t" or "u").
1732 Op.setReg(X86::ST0 + FPReg);
1733 }
1734
1735 // Simulate the inline asm popping its inputs and pushing its outputs.
1736 StackTop -= NumSTPopped;
1737
1738 for (unsigned i = 0; i < NumSTDefs; ++i)
1739 pushReg(NumSTDefs - i - 1);
1740
1741 // If this asm kills any FP registers (is the last use of them) we must
1742 // explicitly emit pop instructions for them. Do this now after the asm has
1743 // executed so that the ST(x) numbers are not off (which would happen if we
1744 // did this inline with operand rewriting).
1745 //
1746 // Note: this might be a non-optimal pop sequence. We might be able to do
1747 // better by trying to pop in stack order or something.
1748 while (FPKills) {
1749 unsigned FPReg = llvm::countr_zero(FPKills);
1750 if (isLive(FPReg))
1751 freeStackSlotAfter(Inst, FPReg);
1752 FPKills &= ~(1U << FPReg);
1753 }
1754
1755 // Don't delete the inline asm!
1756 return;
1757 }
1758
1759 // FAKE_USE must pop its register operand off the stack if it is killed,
1760 // because this constitutes the register's last use. If the operand
1761 // is not killed, it will have its last use later, so we leave it alone.
1762 // In either case we remove the operand so later passes don't see it.
1763 case TargetOpcode::FAKE_USE: {
1764 assert(MI.getNumExplicitOperands() == 1 &&
1765 "FAKE_USE must have exactly one operand");
1766 if (MI.getOperand(0).isKill()) {
1767 freeStackSlotBefore(Inst, getFPReg(MI.getOperand(0)));
1768 }
1769 MI.removeOperand(0);
1770 return;
1771 }
1772 }
1773
1774 Inst = MBB->erase(Inst); // Remove the pseudo instruction
1775
1776 // We want to leave I pointing to the previous instruction, but what if we
1777 // just erased the first instruction?
1778 if (Inst == MBB->begin()) {
1779 LLVM_DEBUG(dbgs() << "Inserting dummy KILL\n");
1780 Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL));
1781 } else
1782 --Inst;
1783}
1784
1785void FPS::setKillFlags(MachineBasicBlock &MBB) const {
1786 const TargetRegisterInfo &TRI =
1788 LiveRegUnits LPR(TRI);
1789
1790 LPR.addLiveOuts(MBB);
1791
1792 for (MachineInstr &MI : llvm::reverse(MBB)) {
1793 if (MI.isDebugInstr())
1794 continue;
1795
1796 std::bitset<8> Defs;
1798
1799 for (auto &MO : MI.operands()) {
1800 if (!MO.isReg())
1801 continue;
1802
1803 unsigned Reg = MO.getReg() - X86::FP0;
1804
1805 if (Reg >= 8)
1806 continue;
1807
1808 if (MO.isDef()) {
1809 Defs.set(Reg);
1810 if (LPR.available(MO.getReg()))
1811 MO.setIsDead();
1812 } else
1813 Uses.push_back(&MO);
1814 }
1815
1816 for (auto *MO : Uses)
1817 if (Defs.test(getFPReg(*MO)) || LPR.available(MO->getReg()))
1818 MO->setIsKill();
1819
1820 LPR.stepBackward(MI);
1821 }
1822}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:298
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
static constexpr MCPhysReg FPReg
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static const TableEntry ReverseST0Table[]
X86 FP Stackifier
#define ASSERT_SORTED(TABLE)
static const TableEntry ForwardST0Table[]
static bool doesInstructionSetFPSW(MachineInstr &MI)
static unsigned getFPReg(const MachineOperand &MO)
getFPReg - Return the X86::FPx register number for the specified operand.
static const TableEntry ForwardSTiTable[]
static const TableEntry OpcodeTable[]
static const TableEntry ReverseSTiTable[]
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static const TableEntry PopTable[]
static unsigned getConcreteOpcode(unsigned Opcode)
#define DEBUG_TYPE
static MachineBasicBlock::iterator getNextFPInstruction(MachineBasicBlock::iterator I)
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:136
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:124
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition: EdgeBundles.h:47
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition: EdgeBundles.h:50
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:31
livein_iterator livein_end() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI void removeLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LiveInVector::const_iterator livein_iterator
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI livein_iterator livein_begin() const
LLVM_ABI void dump() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
unsigned size() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:72
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
size_type size() const
Definition: SmallPtrSet.h:99
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
bool empty() const
Definition: SmallVector.h:82
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ Entry
Definition: COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
Definition: CallingConv.h:203
Reg
All possible values of the reg field in the ModR/M byte.
@ SpecialFP
SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
Definition: X86BaseInfo.h:802
@ NotFP
NotFP - The default, set for instructions that do not use FP registers.
Definition: X86BaseInfo.h:784
@ OneArgFPRW
OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a result back to ST(0).
Definition: X86BaseInfo.h:791
@ ZeroArgFP
ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0.
Definition: X86BaseInfo.h:786
@ OneArgFP
OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst.
Definition: X86BaseInfo.h:788
@ CompareFP
CompareFP - 2 arg FP instructions which implicitly read ST(0) and an explicit argument,...
Definition: X86BaseInfo.h:798
@ CondMovFP
CondMovFP - "2 operand" floating point conditional move instructions.
Definition: X86BaseInfo.h:800
@ TwoArgFP
TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an explicit argument,...
Definition: X86BaseInfo.h:795
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
@ AddrNumOperands
Definition: X86BaseInfo.h:36
constexpr double e
Definition: MathExtras.h:47
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createX86FloatingPointStackifierPass()
This function returns a pass which converts floating-point register references and pseudo instruction...
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:307
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:260
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:264
LLVM_ABI char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2013
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:378
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
std::pair< iterator, bool > insert(NodeRef N)