LLVM 22.0.0git
PPCInstrInfo.cpp
Go to the documentation of this file.
1//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the PowerPC implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCInstrInfo.h"
15#include "PPC.h"
17#include "PPCInstrBuilder.h"
19#include "PPCTargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
37#include "llvm/IR/Module.h"
38#include "llvm/MC/MCInst.h"
41#include "llvm/Support/Debug.h"
44
45using namespace llvm;
46
47#define DEBUG_TYPE "ppc-instr-info"
48
49#define GET_INSTRMAP_INFO
50#define GET_INSTRINFO_CTOR_DTOR
51#include "PPCGenInstrInfo.inc"
52
53STATISTIC(NumStoreSPILLVSRRCAsVec,
54 "Number of spillvsrrc spilled to stack as vec");
55STATISTIC(NumStoreSPILLVSRRCAsGpr,
56 "Number of spillvsrrc spilled to stack as gpr");
57STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
58STATISTIC(CmpIselsConverted,
59 "Number of ISELs that depend on comparison of constants converted");
60STATISTIC(MissedConvertibleImmediateInstrs,
61 "Number of compare-immediate instructions fed by constants");
62STATISTIC(NumRcRotatesConvertedToRcAnd,
63 "Number of record-form rotates converted to record-form andi");
64
65static cl::
66opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
67 cl::desc("Disable analysis for CTR loops"));
68
69static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
70cl::desc("Disable compare instruction optimization"), cl::Hidden);
71
72static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
73cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
75
76static cl::opt<bool>
77UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
78 cl::desc("Use the old (incorrect) instruction latency calculation"));
79
80static cl::opt<float>
81 FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5),
82 cl::desc("register pressure factor for the transformations."));
83
85 "ppc-fma-rp-reduction", cl::Hidden, cl::init(true),
86 cl::desc("enable register pressure reduce in machine combiner pass."));
87
88// Pin the vtable to this file.
89void PPCInstrInfo::anchor() {}
90
92 : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
93 /* CatchRetOpcode */ -1,
94 STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
95 Subtarget(STI), RI(STI.getTargetMachine()) {}
96
97/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
98/// this target when scheduling the DAG.
101 const ScheduleDAG *DAG) const {
102 unsigned Directive =
103 static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
106 const InstrItineraryData *II =
107 static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
108 return new ScoreboardHazardRecognizer(II, DAG);
109 }
110
112}
113
114/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
115/// to use for this target when scheduling the DAG.
118 const ScheduleDAG *DAG) const {
119 unsigned Directive =
120 DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
121
122 // FIXME: Leaving this as-is until we have POWER9 scheduling info
124 return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
125
126 // Most subtargets use a PPC970 recognizer.
129 assert(DAG->TII && "No InstrInfo?");
130
131 return new PPCHazardRecognizer970(*DAG);
132 }
133
134 return new ScoreboardHazardRecognizer(II, DAG);
135}
136
138 const MachineInstr &MI,
139 unsigned *PredCost) const {
140 if (!ItinData || UseOldLatencyCalc)
141 return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
142
143 // The default implementation of getInstrLatency calls getStageLatency, but
144 // getStageLatency does not do the right thing for us. While we have
145 // itinerary, most cores are fully pipelined, and so the itineraries only
146 // express the first part of the pipeline, not every stage. Instead, we need
147 // to use the listed output operand cycle number (using operand 0 here, which
148 // is an output).
149
150 unsigned Latency = 1;
151 unsigned DefClass = MI.getDesc().getSchedClass();
152 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
153 const MachineOperand &MO = MI.getOperand(i);
154 if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
155 continue;
156
157 std::optional<unsigned> Cycle = ItinData->getOperandCycle(DefClass, i);
158 if (!Cycle)
159 continue;
160
161 Latency = std::max(Latency, *Cycle);
162 }
163
164 return Latency;
165}
166
167std::optional<unsigned> PPCInstrInfo::getOperandLatency(
168 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
169 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
170 std::optional<unsigned> Latency = PPCGenInstrInfo::getOperandLatency(
171 ItinData, DefMI, DefIdx, UseMI, UseIdx);
172
173 if (!DefMI.getParent())
174 return Latency;
175
176 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
177 Register Reg = DefMO.getReg();
178
179 bool IsRegCR;
180 if (Reg.isVirtual()) {
181 const MachineRegisterInfo *MRI =
182 &DefMI.getParent()->getParent()->getRegInfo();
183 IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
184 MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
185 } else {
186 IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
187 PPC::CRBITRCRegClass.contains(Reg);
188 }
189
190 if (UseMI.isBranch() && IsRegCR) {
191 if (!Latency)
192 Latency = getInstrLatency(ItinData, DefMI);
193
194 // On some cores, there is an additional delay between writing to a condition
195 // register, and using it from a branch.
196 unsigned Directive = Subtarget.getCPUDirective();
197 switch (Directive) {
198 default: break;
199 case PPC::DIR_7400:
200 case PPC::DIR_750:
201 case PPC::DIR_970:
202 case PPC::DIR_E5500:
203 case PPC::DIR_PWR4:
204 case PPC::DIR_PWR5:
205 case PPC::DIR_PWR5X:
206 case PPC::DIR_PWR6:
207 case PPC::DIR_PWR6X:
208 case PPC::DIR_PWR7:
209 case PPC::DIR_PWR8:
210 // FIXME: Is this needed for POWER9?
211 Latency = *Latency + 2;
212 break;
213 }
214 }
215
216 return Latency;
217}
218
220 uint32_t Flags) const {
221 MI.setFlags(Flags);
225}
226
227// This function does not list all associative and commutative operations, but
228// only those worth feeding through the machine combiner in an attempt to
229// reduce the critical path. Mostly, this means floating-point operations,
230// because they have high latencies(>=5) (compared to other operations, such as
231// and/or, which are also associative and commutative, but have low latencies).
233 bool Invert) const {
234 if (Invert)
235 return false;
236 switch (Inst.getOpcode()) {
237 // Floating point:
238 // FP Add:
239 case PPC::FADD:
240 case PPC::FADDS:
241 // FP Multiply:
242 case PPC::FMUL:
243 case PPC::FMULS:
244 // Altivec Add:
245 case PPC::VADDFP:
246 // VSX Add:
247 case PPC::XSADDDP:
248 case PPC::XVADDDP:
249 case PPC::XVADDSP:
250 case PPC::XSADDSP:
251 // VSX Multiply:
252 case PPC::XSMULDP:
253 case PPC::XVMULDP:
254 case PPC::XVMULSP:
255 case PPC::XSMULSP:
258 // Fixed point:
259 // Multiply:
260 case PPC::MULHD:
261 case PPC::MULLD:
262 case PPC::MULHW:
263 case PPC::MULLW:
264 return true;
265 default:
266 return false;
267 }
268}
269
270#define InfoArrayIdxFMAInst 0
271#define InfoArrayIdxFAddInst 1
272#define InfoArrayIdxFMULInst 2
273#define InfoArrayIdxAddOpIdx 3
274#define InfoArrayIdxMULOpIdx 4
275#define InfoArrayIdxFSubInst 5
276// Array keeps info for FMA instructions:
277// Index 0(InfoArrayIdxFMAInst): FMA instruction;
278// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
279// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
280// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
281// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
282// second MUL operand index is plus 1;
283// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
284static const uint16_t FMAOpIdxInfo[][6] = {
285 // FIXME: Add more FMA instructions like XSNMADDADP and so on.
286 {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP},
287 {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP},
288 {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP},
289 {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP},
290 {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB},
291 {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}};
292
293// Check if an opcode is a FMA instruction. If it is, return the index in array
294// FMAOpIdxInfo. Otherwise, return -1.
295int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
296 for (unsigned I = 0; I < std::size(FMAOpIdxInfo); I++)
297 if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
298 return I;
299 return -1;
300}
301
302// On PowerPC target, we have two kinds of patterns related to FMA:
303// 1: Improve ILP.
304// Try to reassociate FMA chains like below:
305//
306// Pattern 1:
307// A = FADD X, Y (Leaf)
308// B = FMA A, M21, M22 (Prev)
309// C = FMA B, M31, M32 (Root)
310// -->
311// A = FMA X, M21, M22
312// B = FMA Y, M31, M32
313// C = FADD A, B
314//
315// Pattern 2:
316// A = FMA X, M11, M12 (Leaf)
317// B = FMA A, M21, M22 (Prev)
318// C = FMA B, M31, M32 (Root)
319// -->
320// A = FMUL M11, M12
321// B = FMA X, M21, M22
322// D = FMA A, M31, M32
323// C = FADD B, D
324//
325// breaking the dependency between A and B, allowing FMA to be executed in
326// parallel (or back-to-back in a pipeline) instead of depending on each other.
327//
328// 2: Reduce register pressure.
329// Try to reassociate FMA with FSUB and a constant like below:
330// C is a floating point const.
331//
332// Pattern 1:
333// A = FSUB X, Y (Leaf)
334// D = FMA B, C, A (Root)
335// -->
336// A = FMA B, Y, -C
337// D = FMA A, X, C
338//
339// Pattern 2:
340// A = FSUB X, Y (Leaf)
341// D = FMA B, A, C (Root)
342// -->
343// A = FMA B, Y, -C
344// D = FMA A, X, C
345//
346// Before the transformation, A must be assigned with different hardware
347// register with D. After the transformation, A and D must be assigned with
348// same hardware register due to TIE attribute of FMA instructions.
349//
352 bool DoRegPressureReduce) const {
356
357 auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
358 for (const auto &MO : Instr.explicit_operands())
359 if (!(MO.isReg() && MO.getReg().isVirtual()))
360 return false;
361 return true;
362 };
363
364 auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,
365 unsigned OpType) {
366 if (Instr.getOpcode() !=
367 FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType])
368 return false;
369
370 // Instruction can be reassociated.
371 // fast math flags may prohibit reassociation.
372 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
373 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
374 return false;
375
376 // Instruction operands are virtual registers for reassociation.
377 if (!IsAllOpsVirtualReg(Instr))
378 return false;
379
380 // For register pressure reassociation, the FSub must have only one use as
381 // we want to delete the sub to save its def.
382 if (OpType == InfoArrayIdxFSubInst &&
383 !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg()))
384 return false;
385
386 return true;
387 };
388
389 auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
390 int16_t &MulOpIdx, bool IsLeaf) {
391 int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode());
392 if (Idx < 0)
393 return false;
394
395 // Instruction can be reassociated.
396 // fast math flags may prohibit reassociation.
397 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
398 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
399 return false;
400
401 // Instruction operands are virtual registers for reassociation.
402 if (!IsAllOpsVirtualReg(Instr))
403 return false;
404
406 if (IsLeaf)
407 return true;
408
410
411 const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
412 MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg());
413 // If 'add' operand's def is not in current block, don't do ILP related opt.
414 if (!MIAdd || MIAdd->getParent() != MBB)
415 return false;
416
417 // If this is not Leaf FMA Instr, its 'add' operand should only have one use
418 // as this fma will be changed later.
419 return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg());
420 };
421
422 int16_t AddOpIdx = -1;
423 int16_t MulOpIdx = -1;
424
425 bool IsUsedOnceL = false;
426 bool IsUsedOnceR = false;
427 MachineInstr *MULInstrL = nullptr;
428 MachineInstr *MULInstrR = nullptr;
429
430 auto IsRPReductionCandidate = [&]() {
431 // Currently, we only support float and double.
432 // FIXME: add support for other types.
433 unsigned Opcode = Root.getOpcode();
434 if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)
435 return false;
436
437 // Root must be a valid FMA like instruction.
438 // Treat it as leaf as we don't care its add operand.
439 if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) {
440 assert((MulOpIdx >= 0) && "mul operand index not right!");
441 Register MULRegL = TRI->lookThruSingleUseCopyChain(
442 Root.getOperand(MulOpIdx).getReg(), MRI);
443 Register MULRegR = TRI->lookThruSingleUseCopyChain(
444 Root.getOperand(MulOpIdx + 1).getReg(), MRI);
445 if (!MULRegL && !MULRegR)
446 return false;
447
448 if (MULRegL && !MULRegR) {
449 MULRegR =
450 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI);
451 IsUsedOnceL = true;
452 } else if (!MULRegL && MULRegR) {
453 MULRegL =
454 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI);
455 IsUsedOnceR = true;
456 } else {
457 IsUsedOnceL = true;
458 IsUsedOnceR = true;
459 }
460
461 if (!MULRegL.isVirtual() || !MULRegR.isVirtual())
462 return false;
463
464 MULInstrL = MRI->getVRegDef(MULRegL);
465 MULInstrR = MRI->getVRegDef(MULRegR);
466 return true;
467 }
468 return false;
469 };
470
471 // Register pressure fma reassociation patterns.
472 if (DoRegPressureReduce && IsRPReductionCandidate()) {
473 assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");
474 // Register pressure pattern 1
475 if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&
476 IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {
477 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
479 return true;
480 }
481
482 // Register pressure pattern 2
483 if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&
484 IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {
485 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
487 return true;
488 }
489 }
490
491 // ILP fma reassociation patterns.
492 // Root must be a valid FMA like instruction.
493 AddOpIdx = -1;
494 if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false))
495 return false;
496
497 assert((AddOpIdx >= 0) && "add operand index not right!");
498
499 Register RegB = Root.getOperand(AddOpIdx).getReg();
500 MachineInstr *Prev = MRI->getUniqueVRegDef(RegB);
501
502 // Prev must be a valid FMA like instruction.
503 AddOpIdx = -1;
504 if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false))
505 return false;
506
507 assert((AddOpIdx >= 0) && "add operand index not right!");
508
509 Register RegA = Prev->getOperand(AddOpIdx).getReg();
510 MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);
511 AddOpIdx = -1;
512 if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {
514 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
515 return true;
516 }
517 if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {
519 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
520 return true;
521 }
522 return false;
523}
524
526 MachineInstr &Root, unsigned &Pattern,
527 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
528 assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
529
530 MachineFunction *MF = Root.getMF();
534
535 int16_t Idx = getFMAOpIdxInfo(Root.getOpcode());
536 if (Idx < 0)
537 return;
538
540
541 // For now we only need to fix up placeholder for register pressure reduce
542 // patterns.
543 Register ConstReg = 0;
544 switch (Pattern) {
546 ConstReg =
547 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);
548 break;
550 ConstReg =
551 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);
552 break;
553 default:
554 // Not register pressure reduce patterns.
555 return;
556 }
557
558 MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg);
559 // Get const value from const pool.
560 const Constant *C = getConstantFromConstantPool(ConstDefInstr);
561 assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");
562
563 // Get negative fp const.
564 APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF());
565 F1.changeSign();
566 Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1);
567 Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType());
568
569 // Put negative fp const into constant pool.
570 unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment);
571
572 MachineOperand *Placeholder = nullptr;
573 // Record the placeholder PPC::ZERO8 we add in reassociateFMA.
574 for (auto *Inst : InsInstrs) {
575 for (MachineOperand &Operand : Inst->explicit_operands()) {
576 assert(Operand.isReg() && "Invalid instruction in InsInstrs!");
577 if (Operand.getReg() == PPC::ZERO8) {
578 Placeholder = &Operand;
579 break;
580 }
581 }
582 }
583
584 assert(Placeholder && "Placeholder does not exist!");
585
586 // Generate instructions to load the const fp from constant pool.
587 // We only support PPC64 and medium code model.
588 Register LoadNewConst =
589 generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs);
590
591 // Fill the placeholder with the new load from constant pool.
592 Placeholder->setReg(LoadNewConst);
593}
594
596 const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const {
597
599 return false;
600
601 // Currently, we only enable register pressure reducing in machine combiner
602 // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
603 // support.
604 //
605 // So we need following instructions to access a TOC entry:
606 //
607 // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
608 // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
609 // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
610 //
611 // FIXME: add more supported targets, like Small and Large code model, PPC32,
612 // AIX.
613 if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
615 return false;
616
618 const MachineFunction *MF = MBB->getParent();
619 const MachineRegisterInfo *MRI = &MF->getRegInfo();
620
621 auto GetMBBPressure =
622 [&](const MachineBasicBlock *MBB) -> std::vector<unsigned> {
623 RegionPressure Pressure;
624 RegPressureTracker RPTracker(Pressure);
625
626 // Initialize the register pressure tracker.
627 RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(),
628 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
629
630 for (const auto &MI : reverse(*MBB)) {
631 if (MI.isDebugValue() || MI.isDebugLabel())
632 continue;
633 RegisterOperands RegOpers;
634 RegOpers.collect(MI, *TRI, *MRI, false, false);
635 RPTracker.recedeSkipDebugValues();
636 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
637 RPTracker.recede(RegOpers);
638 }
639
640 // Close the RPTracker to finalize live ins.
641 RPTracker.closeRegion();
642
643 return RPTracker.getPressure().MaxSetPressure;
644 };
645
646 // For now we only care about float and double type fma.
647 unsigned VSSRCLimit =
648 RegClassInfo->getRegPressureSetLimit(PPC::RegisterPressureSets::VSSRC);
649
650 // Only reduce register pressure when pressure is high.
651 return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >
652 (float)VSSRCLimit * FMARPFactor;
653}
654
656 // I has only one memory operand which is load from constant pool.
657 if (!I->hasOneMemOperand())
658 return false;
659
660 MachineMemOperand *Op = I->memoperands()[0];
661 return Op->isLoad() && Op->getPseudoValue() &&
662 Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;
663}
664
665Register PPCInstrInfo::generateLoadForNewConst(
666 unsigned Idx, MachineInstr *MI, Type *Ty,
667 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
668 // Now we only support PPC64, Medium code model and P9 with vector.
669 // We have immutable pattern to access const pool. See function
670 // shouldReduceRegisterPressure.
671 assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
673 "Target not supported!\n");
674
675 MachineFunction *MF = MI->getMF();
677
678 // Generate ADDIStocHA8
679 Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
680 MachineInstrBuilder TOCOffset =
681 BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1)
682 .addReg(PPC::X2)
684
685 assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
686 "Only float and double are supported!");
687
688 unsigned LoadOpcode;
689 // Should be float type or double type.
690 if (Ty->isFloatTy())
691 LoadOpcode = PPC::DFLOADf32;
692 else
693 LoadOpcode = PPC::DFLOADf64;
694
695 const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
696 Register VReg2 = MRI->createVirtualRegister(RC);
700
701 // Generate Load from constant pool.
703 BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2)
705 .addReg(VReg1, getKillRegState(true))
706 .addMemOperand(MMO);
707
708 Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO);
709
710 // Insert the toc load instructions into InsInstrs.
711 InsInstrs.insert(InsInstrs.begin(), Load);
712 InsInstrs.insert(InsInstrs.begin(), TOCOffset);
713 return VReg2;
714}
715
716// This function returns the const value in constant pool if the \p I is a load
717// from constant pool.
718const Constant *
720 MachineFunction *MF = I->getMF();
723 assert(I->mayLoad() && "Should be a load instruction.\n");
724 for (auto MO : I->uses()) {
725 if (!MO.isReg())
726 continue;
727 Register Reg = MO.getReg();
728 if (Reg == 0 || !Reg.isVirtual())
729 continue;
730 // Find the toc address.
731 MachineInstr *DefMI = MRI->getVRegDef(Reg);
732 for (auto MO2 : DefMI->uses())
733 if (MO2.isCPI())
734 return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;
735 }
736 return nullptr;
737}
738
740 switch (Pattern) {
747 default:
749 }
750}
751
754 bool DoRegPressureReduce) const {
755 // Using the machine combiner in this way is potentially expensive, so
756 // restrict to when aggressive optimizations are desired.
758 return false;
759
760 if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
761 return true;
762
764 DoRegPressureReduce);
765}
766
768 MachineInstr &Root, unsigned Pattern,
771 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
772 switch (Pattern) {
777 reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
778 break;
779 default:
780 // Reassociate default patterns.
782 DelInstrs, InstrIdxForVirtReg);
783 break;
784 }
785}
786
787void PPCInstrInfo::reassociateFMA(
788 MachineInstr &Root, unsigned Pattern,
791 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
792 MachineFunction *MF = Root.getMF();
795 MachineOperand &OpC = Root.getOperand(0);
796 Register RegC = OpC.getReg();
797 const TargetRegisterClass *RC = MRI.getRegClass(RegC);
798 MRI.constrainRegClass(RegC, RC);
799
800 unsigned FmaOp = Root.getOpcode();
801 int16_t Idx = getFMAOpIdxInfo(FmaOp);
802 assert(Idx >= 0 && "Root must be a FMA instruction");
803
804 bool IsILPReassociate =
807
810
811 MachineInstr *Prev = nullptr;
812 MachineInstr *Leaf = nullptr;
813 switch (Pattern) {
814 default:
815 llvm_unreachable("not recognized pattern!");
818 Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
819 Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
820 break;
822 Register MULReg =
823 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);
824 Leaf = MRI.getVRegDef(MULReg);
825 break;
826 }
828 Register MULReg = TRI->lookThruCopyLike(
829 Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);
830 Leaf = MRI.getVRegDef(MULReg);
831 break;
832 }
833 }
834
835 uint32_t IntersectedFlags = 0;
836 if (IsILPReassociate)
837 IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
838 else
839 IntersectedFlags = Root.getFlags() & Leaf->getFlags();
840
841 auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
842 bool &KillFlag) {
843 Reg = Operand.getReg();
844 MRI.constrainRegClass(Reg, RC);
845 KillFlag = Operand.isKill();
846 };
847
848 auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
849 Register &MulOp2, Register &AddOp,
850 bool &MulOp1KillFlag, bool &MulOp2KillFlag,
851 bool &AddOpKillFlag) {
852 GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
853 GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
854 GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag);
855 };
856
857 Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,
858 RegA21, RegB;
859 bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
860 KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,
861 KillA11 = false, KillA21 = false, KillB = false;
862
863 GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);
864
865 if (IsILPReassociate)
866 GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
867
869 GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
870 GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
872 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
873 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
874 } else {
875 // Get FSUB instruction info.
876 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
877 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
878 }
879
880 // Create new virtual registers for the new results instead of
881 // recycling legacy ones because the MachineCombiner's computation of the
882 // critical path requires a new register definition rather than an existing
883 // one.
884 // For register pressure reassociation, we only need create one virtual
885 // register for the new fma.
886 Register NewVRA = MRI.createVirtualRegister(RC);
887 InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
888
889 Register NewVRB = 0;
890 if (IsILPReassociate) {
891 NewVRB = MRI.createVirtualRegister(RC);
892 InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
893 }
894
895 Register NewVRD = 0;
897 NewVRD = MRI.createVirtualRegister(RC);
898 InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
899 }
900
901 auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
902 Register RegMul1, bool KillRegMul1,
903 Register RegMul2, bool KillRegMul2) {
904 MI->getOperand(AddOpIdx).setReg(RegAdd);
905 MI->getOperand(AddOpIdx).setIsKill(KillAdd);
906 MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
907 MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
908 MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
909 MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
910 };
911
912 MachineInstrBuilder NewARegPressure, NewCRegPressure;
913 switch (Pattern) {
914 default:
915 llvm_unreachable("not recognized pattern!");
917 // Create new instructions for insertion.
918 MachineInstrBuilder MINewB =
919 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
920 .addReg(RegX, getKillRegState(KillX))
921 .addReg(RegM21, getKillRegState(KillM21))
922 .addReg(RegM22, getKillRegState(KillM22));
923 MachineInstrBuilder MINewA =
924 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
925 .addReg(RegY, getKillRegState(KillY))
926 .addReg(RegM31, getKillRegState(KillM31))
927 .addReg(RegM32, getKillRegState(KillM32));
928 // If AddOpIdx is not 1, adjust the order.
929 if (AddOpIdx != 1) {
930 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
931 AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
932 }
933
934 MachineInstrBuilder MINewC =
935 BuildMI(*MF, Root.getDebugLoc(),
937 .addReg(NewVRB, getKillRegState(true))
938 .addReg(NewVRA, getKillRegState(true));
939
940 // Update flags for newly created instructions.
941 setSpecialOperandAttr(*MINewA, IntersectedFlags);
942 setSpecialOperandAttr(*MINewB, IntersectedFlags);
943 setSpecialOperandAttr(*MINewC, IntersectedFlags);
944
945 // Record new instructions for insertion.
946 InsInstrs.push_back(MINewA);
947 InsInstrs.push_back(MINewB);
948 InsInstrs.push_back(MINewC);
949 break;
950 }
952 assert(NewVRD && "new FMA register not created!");
953 // Create new instructions for insertion.
954 MachineInstrBuilder MINewA =
955 BuildMI(*MF, Leaf->getDebugLoc(),
957 .addReg(RegM11, getKillRegState(KillM11))
958 .addReg(RegM12, getKillRegState(KillM12));
959 MachineInstrBuilder MINewB =
960 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
961 .addReg(RegX, getKillRegState(KillX))
962 .addReg(RegM21, getKillRegState(KillM21))
963 .addReg(RegM22, getKillRegState(KillM22));
964 MachineInstrBuilder MINewD =
965 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
966 .addReg(NewVRA, getKillRegState(true))
967 .addReg(RegM31, getKillRegState(KillM31))
968 .addReg(RegM32, getKillRegState(KillM32));
969 // If AddOpIdx is not 1, adjust the order.
970 if (AddOpIdx != 1) {
971 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
972 AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
973 KillM32);
974 }
975
976 MachineInstrBuilder MINewC =
977 BuildMI(*MF, Root.getDebugLoc(),
979 .addReg(NewVRB, getKillRegState(true))
980 .addReg(NewVRD, getKillRegState(true));
981
982 // Update flags for newly created instructions.
983 setSpecialOperandAttr(*MINewA, IntersectedFlags);
984 setSpecialOperandAttr(*MINewB, IntersectedFlags);
985 setSpecialOperandAttr(*MINewD, IntersectedFlags);
986 setSpecialOperandAttr(*MINewC, IntersectedFlags);
987
988 // Record new instructions for insertion.
989 InsInstrs.push_back(MINewA);
990 InsInstrs.push_back(MINewB);
991 InsInstrs.push_back(MINewD);
992 InsInstrs.push_back(MINewC);
993 break;
994 }
997 Register VarReg;
998 bool KillVarReg = false;
1000 VarReg = RegM31;
1001 KillVarReg = KillM31;
1002 } else {
1003 VarReg = RegM32;
1004 KillVarReg = KillM32;
1005 }
1006 // We don't want to get negative const from memory pool too early, as the
1007 // created entry will not be deleted even if it has no users. Since all
1008 // operand of Leaf and Root are virtual register, we use zero register
1009 // here as a placeholder. When the InsInstrs is selected in
1010 // MachineCombiner, we call finalizeInsInstrs to replace the zero register
1011 // with a virtual register which is a load from constant pool.
1012 NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
1013 .addReg(RegB, getKillRegState(RegB))
1014 .addReg(RegY, getKillRegState(KillY))
1015 .addReg(PPC::ZERO8);
1016 NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC)
1017 .addReg(NewVRA, getKillRegState(true))
1018 .addReg(RegX, getKillRegState(KillX))
1019 .addReg(VarReg, getKillRegState(KillVarReg));
1020 // For now, we only support xsmaddadp/xsmaddasp, their add operand are
1021 // both at index 1, no need to adjust.
1022 // FIXME: when add more fma instructions support, like fma/fmas, adjust
1023 // the operand index here.
1024 break;
1025 }
1026 }
1027
1028 if (!IsILPReassociate) {
1029 setSpecialOperandAttr(*NewARegPressure, IntersectedFlags);
1030 setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags);
1031
1032 InsInstrs.push_back(NewARegPressure);
1033 InsInstrs.push_back(NewCRegPressure);
1034 }
1035
1036 assert(!InsInstrs.empty() &&
1037 "Insertion instructions set should not be empty!");
1038
1039 // Record old instructions for deletion.
1040 DelInstrs.push_back(Leaf);
1041 if (IsILPReassociate)
1042 DelInstrs.push_back(Prev);
1043 DelInstrs.push_back(&Root);
1044}
1045
1046// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
1048 Register &SrcReg, Register &DstReg,
1049 unsigned &SubIdx) const {
1050 switch (MI.getOpcode()) {
1051 default: return false;
1052 case PPC::EXTSW:
1053 case PPC::EXTSW_32:
1054 case PPC::EXTSW_32_64:
1055 SrcReg = MI.getOperand(1).getReg();
1056 DstReg = MI.getOperand(0).getReg();
1057 SubIdx = PPC::sub_32;
1058 return true;
1059 }
1060}
1061
1063 int &FrameIndex) const {
1064 if (llvm::is_contained(getLoadOpcodesForSpillArray(), MI.getOpcode())) {
1065 // Check for the operands added by addFrameReference (the immediate is the
1066 // offset which defaults to 0).
1067 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1068 MI.getOperand(2).isFI()) {
1069 FrameIndex = MI.getOperand(2).getIndex();
1070 return MI.getOperand(0).getReg();
1071 }
1072 }
1073 return 0;
1074}
1075
1076// For opcodes with the ReMaterializable flag set, this function is called to
1077// verify the instruction is really rematable.
1079 const MachineInstr &MI) const {
1080 switch (MI.getOpcode()) {
1081 default:
1082 // Let base implementaion decide.
1083 break;
1084 case PPC::LI:
1085 case PPC::LI8:
1086 case PPC::PLI:
1087 case PPC::PLI8:
1088 case PPC::LIS:
1089 case PPC::LIS8:
1090 case PPC::ADDIStocHA:
1091 case PPC::ADDIStocHA8:
1092 case PPC::ADDItocL:
1093 case PPC::ADDItocL8:
1094 case PPC::LOAD_STACK_GUARD:
1095 case PPC::PPCLdFixedAddr:
1096 case PPC::XXLXORz:
1097 case PPC::XXLXORspz:
1098 case PPC::XXLXORdpz:
1099 case PPC::XXLEQVOnes:
1100 case PPC::XXSPLTI32DX:
1101 case PPC::XXSPLTIW:
1102 case PPC::XXSPLTIDP:
1103 case PPC::V_SET0B:
1104 case PPC::V_SET0H:
1105 case PPC::V_SET0:
1106 case PPC::V_SETALLONESB:
1107 case PPC::V_SETALLONESH:
1108 case PPC::V_SETALLONES:
1109 case PPC::CRSET:
1110 case PPC::CRUNSET:
1111 case PPC::XXSETACCZ:
1112 case PPC::DMXXSETACCZ:
1113 return true;
1114 }
1116}
1117
1119 int &FrameIndex) const {
1120 if (llvm::is_contained(getStoreOpcodesForSpillArray(), MI.getOpcode())) {
1121 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1122 MI.getOperand(2).isFI()) {
1123 FrameIndex = MI.getOperand(2).getIndex();
1124 return MI.getOperand(0).getReg();
1125 }
1126 }
1127 return 0;
1128}
1129
1131 unsigned OpIdx1,
1132 unsigned OpIdx2) const {
1133 MachineFunction &MF = *MI.getParent()->getParent();
1134
1135 // Normal instructions can be commuted the obvious way.
1136 if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
1137 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1138 // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
1139 // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
1140 // changing the relative order of the mask operands might change what happens
1141 // to the high-bits of the mask (and, thus, the result).
1142
1143 // Cannot commute if it has a non-zero rotate count.
1144 if (MI.getOperand(3).getImm() != 0)
1145 return nullptr;
1146
1147 // If we have a zero rotate count, we have:
1148 // M = mask(MB,ME)
1149 // Op0 = (Op1 & ~M) | (Op2 & M)
1150 // Change this to:
1151 // M = mask((ME+1)&31, (MB-1)&31)
1152 // Op0 = (Op2 & ~M) | (Op1 & M)
1153
1154 // Swap op1/op2
1155 assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
1156 "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
1157 Register Reg0 = MI.getOperand(0).getReg();
1158 Register Reg1 = MI.getOperand(1).getReg();
1159 Register Reg2 = MI.getOperand(2).getReg();
1160 unsigned SubReg1 = MI.getOperand(1).getSubReg();
1161 unsigned SubReg2 = MI.getOperand(2).getSubReg();
1162 bool Reg1IsKill = MI.getOperand(1).isKill();
1163 bool Reg2IsKill = MI.getOperand(2).isKill();
1164 bool ChangeReg0 = false;
1165 // If machine instrs are no longer in two-address forms, update
1166 // destination register as well.
1167 if (Reg0 == Reg1) {
1168 // Must be two address instruction (i.e. op1 is tied to op0).
1169 assert(MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&
1170 "Expecting a two-address instruction!");
1171 assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
1172 Reg2IsKill = false;
1173 ChangeReg0 = true;
1174 }
1175
1176 // Masks.
1177 unsigned MB = MI.getOperand(4).getImm();
1178 unsigned ME = MI.getOperand(5).getImm();
1179
1180 // We can't commute a trivial mask (there is no way to represent an all-zero
1181 // mask).
1182 if (MB == 0 && ME == 31)
1183 return nullptr;
1184
1185 if (NewMI) {
1186 // Create a new instruction.
1187 Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
1188 bool Reg0IsDead = MI.getOperand(0).isDead();
1189 return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
1190 .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
1191 .addReg(Reg2, getKillRegState(Reg2IsKill))
1192 .addReg(Reg1, getKillRegState(Reg1IsKill))
1193 .addImm((ME + 1) & 31)
1194 .addImm((MB - 1) & 31);
1195 }
1196
1197 if (ChangeReg0) {
1198 MI.getOperand(0).setReg(Reg2);
1199 MI.getOperand(0).setSubReg(SubReg2);
1200 }
1201 MI.getOperand(2).setReg(Reg1);
1202 MI.getOperand(1).setReg(Reg2);
1203 MI.getOperand(2).setSubReg(SubReg1);
1204 MI.getOperand(1).setSubReg(SubReg2);
1205 MI.getOperand(2).setIsKill(Reg1IsKill);
1206 MI.getOperand(1).setIsKill(Reg2IsKill);
1207
1208 // Swap the mask around.
1209 MI.getOperand(4).setImm((ME + 1) & 31);
1210 MI.getOperand(5).setImm((MB - 1) & 31);
1211 return &MI;
1212}
1213
1215 unsigned &SrcOpIdx1,
1216 unsigned &SrcOpIdx2) const {
1217 // For VSX A-Type FMA instructions, it is the first two operands that can be
1218 // commuted, however, because the non-encoded tied input operand is listed
1219 // first, the operands to swap are actually the second and third.
1220
1221 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
1222 if (AltOpc == -1)
1223 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
1224
1225 // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
1226 // and SrcOpIdx2.
1227 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
1228}
1229
1232 // This function is used for scheduling, and the nop wanted here is the type
1233 // that terminates dispatch groups on the POWER cores.
1234 unsigned Directive = Subtarget.getCPUDirective();
1235 unsigned Opcode;
1236 switch (Directive) {
1237 default: Opcode = PPC::NOP; break;
1238 case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
1239 case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
1240 case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
1241 // FIXME: Update when POWER9 scheduling model is ready.
1242 case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break;
1243 }
1244
1245 DebugLoc DL;
1246 BuildMI(MBB, MI, DL, get(Opcode));
1247}
1248
1249/// Return the noop instruction to use for a noop.
1251 MCInst Nop;
1252 Nop.setOpcode(PPC::NOP);
1253 return Nop;
1254}
1255
1256// Branch analysis.
1257// Note: If the condition register is set to CTR or CTR8 then this is a
1258// BDNZ (imm == 1) or BDZ (imm == 0) branch.
1261 MachineBasicBlock *&FBB,
1263 bool AllowModify) const {
1264 bool isPPC64 = Subtarget.isPPC64();
1265
1266 // If the block has no terminators, it just falls into the block after it.
1268 if (I == MBB.end())
1269 return false;
1270
1271 if (!isUnpredicatedTerminator(*I))
1272 return false;
1273
1274 if (AllowModify) {
1275 // If the BB ends with an unconditional branch to the fallthrough BB,
1276 // we eliminate the branch instruction.
1277 if (I->getOpcode() == PPC::B &&
1278 MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
1279 I->eraseFromParent();
1280
1281 // We update iterator after deleting the last branch.
1283 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
1284 return false;
1285 }
1286 }
1287
1288 // Get the last instruction in the block.
1289 MachineInstr &LastInst = *I;
1290
1291 // If there is only one terminator instruction, process it.
1292 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
1293 if (LastInst.getOpcode() == PPC::B) {
1294 if (!LastInst.getOperand(0).isMBB())
1295 return true;
1296 TBB = LastInst.getOperand(0).getMBB();
1297 return false;
1298 } else if (LastInst.getOpcode() == PPC::BCC) {
1299 if (!LastInst.getOperand(2).isMBB())
1300 return true;
1301 // Block ends with fall-through condbranch.
1302 TBB = LastInst.getOperand(2).getMBB();
1303 Cond.push_back(LastInst.getOperand(0));
1304 Cond.push_back(LastInst.getOperand(1));
1305 return false;
1306 } else if (LastInst.getOpcode() == PPC::BC) {
1307 if (!LastInst.getOperand(1).isMBB())
1308 return true;
1309 // Block ends with fall-through condbranch.
1310 TBB = LastInst.getOperand(1).getMBB();
1312 Cond.push_back(LastInst.getOperand(0));
1313 return false;
1314 } else if (LastInst.getOpcode() == PPC::BCn) {
1315 if (!LastInst.getOperand(1).isMBB())
1316 return true;
1317 // Block ends with fall-through condbranch.
1318 TBB = LastInst.getOperand(1).getMBB();
1320 Cond.push_back(LastInst.getOperand(0));
1321 return false;
1322 } else if (LastInst.getOpcode() == PPC::BDNZ8 ||
1323 LastInst.getOpcode() == PPC::BDNZ) {
1324 if (!LastInst.getOperand(0).isMBB())
1325 return true;
1327 return true;
1328 TBB = LastInst.getOperand(0).getMBB();
1329 Cond.push_back(MachineOperand::CreateImm(1));
1330 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1331 true));
1332 return false;
1333 } else if (LastInst.getOpcode() == PPC::BDZ8 ||
1334 LastInst.getOpcode() == PPC::BDZ) {
1335 if (!LastInst.getOperand(0).isMBB())
1336 return true;
1338 return true;
1339 TBB = LastInst.getOperand(0).getMBB();
1340 Cond.push_back(MachineOperand::CreateImm(0));
1341 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1342 true));
1343 return false;
1344 }
1345
1346 // Otherwise, don't know what this is.
1347 return true;
1348 }
1349
1350 // Get the instruction before it if it's a terminator.
1351 MachineInstr &SecondLastInst = *I;
1352
1353 // If there are three terminators, we don't know what sort of block this is.
1354 if (I != MBB.begin() && isUnpredicatedTerminator(*--I))
1355 return true;
1356
1357 // If the block ends with PPC::B and PPC:BCC, handle it.
1358 if (SecondLastInst.getOpcode() == PPC::BCC &&
1359 LastInst.getOpcode() == PPC::B) {
1360 if (!SecondLastInst.getOperand(2).isMBB() ||
1361 !LastInst.getOperand(0).isMBB())
1362 return true;
1363 TBB = SecondLastInst.getOperand(2).getMBB();
1364 Cond.push_back(SecondLastInst.getOperand(0));
1365 Cond.push_back(SecondLastInst.getOperand(1));
1366 FBB = LastInst.getOperand(0).getMBB();
1367 return false;
1368 } else if (SecondLastInst.getOpcode() == PPC::BC &&
1369 LastInst.getOpcode() == PPC::B) {
1370 if (!SecondLastInst.getOperand(1).isMBB() ||
1371 !LastInst.getOperand(0).isMBB())
1372 return true;
1373 TBB = SecondLastInst.getOperand(1).getMBB();
1375 Cond.push_back(SecondLastInst.getOperand(0));
1376 FBB = LastInst.getOperand(0).getMBB();
1377 return false;
1378 } else if (SecondLastInst.getOpcode() == PPC::BCn &&
1379 LastInst.getOpcode() == PPC::B) {
1380 if (!SecondLastInst.getOperand(1).isMBB() ||
1381 !LastInst.getOperand(0).isMBB())
1382 return true;
1383 TBB = SecondLastInst.getOperand(1).getMBB();
1385 Cond.push_back(SecondLastInst.getOperand(0));
1386 FBB = LastInst.getOperand(0).getMBB();
1387 return false;
1388 } else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 ||
1389 SecondLastInst.getOpcode() == PPC::BDNZ) &&
1390 LastInst.getOpcode() == PPC::B) {
1391 if (!SecondLastInst.getOperand(0).isMBB() ||
1392 !LastInst.getOperand(0).isMBB())
1393 return true;
1395 return true;
1396 TBB = SecondLastInst.getOperand(0).getMBB();
1397 Cond.push_back(MachineOperand::CreateImm(1));
1398 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1399 true));
1400 FBB = LastInst.getOperand(0).getMBB();
1401 return false;
1402 } else if ((SecondLastInst.getOpcode() == PPC::BDZ8 ||
1403 SecondLastInst.getOpcode() == PPC::BDZ) &&
1404 LastInst.getOpcode() == PPC::B) {
1405 if (!SecondLastInst.getOperand(0).isMBB() ||
1406 !LastInst.getOperand(0).isMBB())
1407 return true;
1409 return true;
1410 TBB = SecondLastInst.getOperand(0).getMBB();
1411 Cond.push_back(MachineOperand::CreateImm(0));
1412 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1413 true));
1414 FBB = LastInst.getOperand(0).getMBB();
1415 return false;
1416 }
1417
1418 // If the block ends with two PPC:Bs, handle it. The second one is not
1419 // executed, so remove it.
1420 if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) {
1421 if (!SecondLastInst.getOperand(0).isMBB())
1422 return true;
1423 TBB = SecondLastInst.getOperand(0).getMBB();
1424 I = LastInst;
1425 if (AllowModify)
1426 I->eraseFromParent();
1427 return false;
1428 }
1429
1430 // Otherwise, can't handle this.
1431 return true;
1432}
1433
1435 int *BytesRemoved) const {
1436 assert(!BytesRemoved && "code size not handled");
1437
1439 if (I == MBB.end())
1440 return 0;
1441
1442 if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
1443 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1444 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1445 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1446 return 0;
1447
1448 // Remove the branch.
1449 I->eraseFromParent();
1450
1451 I = MBB.end();
1452
1453 if (I == MBB.begin()) return 1;
1454 --I;
1455 if (I->getOpcode() != PPC::BCC &&
1456 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1457 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1458 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1459 return 1;
1460
1461 // Remove the branch.
1462 I->eraseFromParent();
1463 return 2;
1464}
1465
1468 MachineBasicBlock *FBB,
1470 const DebugLoc &DL,
1471 int *BytesAdded) const {
1472 // Shouldn't be a fall through.
1473 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1474 assert((Cond.size() == 2 || Cond.size() == 0) &&
1475 "PPC branch conditions have two components!");
1476 assert(!BytesAdded && "code size not handled");
1477
1478 bool isPPC64 = Subtarget.isPPC64();
1479
1480 // One-way branch.
1481 if (!FBB) {
1482 if (Cond.empty()) // Unconditional branch
1483 BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
1484 else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1485 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1486 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1487 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1488 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1489 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1490 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1491 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1492 else // Conditional branch
1493 BuildMI(&MBB, DL, get(PPC::BCC))
1494 .addImm(Cond[0].getImm())
1495 .add(Cond[1])
1496 .addMBB(TBB);
1497 return 1;
1498 }
1499
1500 // Two-way Conditional Branch.
1501 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1502 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1503 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1504 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1505 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1506 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1507 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1508 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1509 else
1510 BuildMI(&MBB, DL, get(PPC::BCC))
1511 .addImm(Cond[0].getImm())
1512 .add(Cond[1])
1513 .addMBB(TBB);
1514 BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
1515 return 2;
1516}
1517
1518// Select analysis.
1521 Register DstReg, Register TrueReg,
1522 Register FalseReg, int &CondCycles,
1523 int &TrueCycles, int &FalseCycles) const {
1524 if (!Subtarget.hasISEL())
1525 return false;
1526
1527 if (Cond.size() != 2)
1528 return false;
1529
1530 // If this is really a bdnz-like condition, then it cannot be turned into a
1531 // select.
1532 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1533 return false;
1534
1535 // If the conditional branch uses a physical register, then it cannot be
1536 // turned into a select.
1537 if (Cond[1].getReg().isPhysical())
1538 return false;
1539
1540 // Check register classes.
1542 const TargetRegisterClass *RC =
1543 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1544 if (!RC)
1545 return false;
1546
1547 // isel is for regular integer GPRs only.
1548 if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
1549 !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
1550 !PPC::G8RCRegClass.hasSubClassEq(RC) &&
1551 !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
1552 return false;
1553
1554 // FIXME: These numbers are for the A2, how well they work for other cores is
1555 // an open question. On the A2, the isel instruction has a 2-cycle latency
1556 // but single-cycle throughput. These numbers are used in combination with
1557 // the MispredictPenalty setting from the active SchedMachineModel.
1558 CondCycles = 1;
1559 TrueCycles = 1;
1560 FalseCycles = 1;
1561
1562 return true;
1563}
1564
1567 const DebugLoc &dl, Register DestReg,
1569 Register FalseReg) const {
1570 assert(Cond.size() == 2 &&
1571 "PPC branch conditions have two components!");
1572
1573 // Get the register classes.
1575 const TargetRegisterClass *RC =
1576 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1577 assert(RC && "TrueReg and FalseReg must have overlapping register classes");
1578
1579 bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
1580 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
1581 assert((Is64Bit ||
1582 PPC::GPRCRegClass.hasSubClassEq(RC) ||
1583 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
1584 "isel is for regular integer GPRs only");
1585
1586 unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
1587 auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());
1588
1589 unsigned SubIdx = 0;
1590 bool SwapOps = false;
1591 switch (SelectPred) {
1592 case PPC::PRED_EQ:
1593 case PPC::PRED_EQ_MINUS:
1594 case PPC::PRED_EQ_PLUS:
1595 SubIdx = PPC::sub_eq; SwapOps = false; break;
1596 case PPC::PRED_NE:
1597 case PPC::PRED_NE_MINUS:
1598 case PPC::PRED_NE_PLUS:
1599 SubIdx = PPC::sub_eq; SwapOps = true; break;
1600 case PPC::PRED_LT:
1601 case PPC::PRED_LT_MINUS:
1602 case PPC::PRED_LT_PLUS:
1603 SubIdx = PPC::sub_lt; SwapOps = false; break;
1604 case PPC::PRED_GE:
1605 case PPC::PRED_GE_MINUS:
1606 case PPC::PRED_GE_PLUS:
1607 SubIdx = PPC::sub_lt; SwapOps = true; break;
1608 case PPC::PRED_GT:
1609 case PPC::PRED_GT_MINUS:
1610 case PPC::PRED_GT_PLUS:
1611 SubIdx = PPC::sub_gt; SwapOps = false; break;
1612 case PPC::PRED_LE:
1613 case PPC::PRED_LE_MINUS:
1614 case PPC::PRED_LE_PLUS:
1615 SubIdx = PPC::sub_gt; SwapOps = true; break;
1616 case PPC::PRED_UN:
1617 case PPC::PRED_UN_MINUS:
1618 case PPC::PRED_UN_PLUS:
1619 SubIdx = PPC::sub_un; SwapOps = false; break;
1620 case PPC::PRED_NU:
1621 case PPC::PRED_NU_MINUS:
1622 case PPC::PRED_NU_PLUS:
1623 SubIdx = PPC::sub_un; SwapOps = true; break;
1624 case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
1625 case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
1626 }
1627
1628 Register FirstReg = SwapOps ? FalseReg : TrueReg,
1629 SecondReg = SwapOps ? TrueReg : FalseReg;
1630
1631 // The first input register of isel cannot be r0. If it is a member
1632 // of a register class that can be r0, then copy it first (the
1633 // register allocator should eliminate the copy).
1634 if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
1635 MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
1636 const TargetRegisterClass *FirstRC =
1637 MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
1638 &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
1639 Register OldFirstReg = FirstReg;
1640 FirstReg = MRI.createVirtualRegister(FirstRC);
1641 BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
1642 .addReg(OldFirstReg);
1643 }
1644
1645 BuildMI(MBB, MI, dl, get(OpCode), DestReg)
1646 .addReg(FirstReg).addReg(SecondReg)
1647 .addReg(Cond[1].getReg(), 0, SubIdx);
1648}
1649
1650static unsigned getCRBitValue(unsigned CRBit) {
1651 unsigned Ret = 4;
1652 if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
1653 CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
1654 CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
1655 CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
1656 Ret = 3;
1657 if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
1658 CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
1659 CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
1660 CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
1661 Ret = 2;
1662 if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
1663 CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
1664 CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
1665 CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
1666 Ret = 1;
1667 if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
1668 CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
1669 CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
1670 CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
1671 Ret = 0;
1672
1673 assert(Ret != 4 && "Invalid CR bit register");
1674 return Ret;
1675}
1676
1679 const DebugLoc &DL, Register DestReg,
1680 Register SrcReg, bool KillSrc,
1681 bool RenamableDest, bool RenamableSrc) const {
1682 // We can end up with self copies and similar things as a result of VSX copy
1683 // legalization. Promote them here.
1685 if (PPC::F8RCRegClass.contains(DestReg) &&
1686 PPC::VSRCRegClass.contains(SrcReg)) {
1687 MCRegister SuperReg =
1688 TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
1689
1690 if (VSXSelfCopyCrash && SrcReg == SuperReg)
1691 llvm_unreachable("nop VSX copy");
1692
1693 DestReg = SuperReg;
1694 } else if (PPC::F8RCRegClass.contains(SrcReg) &&
1695 PPC::VSRCRegClass.contains(DestReg)) {
1696 MCRegister SuperReg =
1697 TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
1698
1699 if (VSXSelfCopyCrash && DestReg == SuperReg)
1700 llvm_unreachable("nop VSX copy");
1701
1702 SrcReg = SuperReg;
1703 }
1704
1705 // Different class register copy
1706 if (PPC::CRBITRCRegClass.contains(SrcReg) &&
1707 PPC::GPRCRegClass.contains(DestReg)) {
1708 MCRegister CRReg = getCRFromCRBit(SrcReg);
1709 BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);
1710 getKillRegState(KillSrc);
1711 // Rotate the CR bit in the CR fields to be the least significant bit and
1712 // then mask with 0x1 (MB = ME = 31).
1713 BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
1714 .addReg(DestReg, RegState::Kill)
1715 .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
1716 .addImm(31)
1717 .addImm(31);
1718 return;
1719 } else if (PPC::CRRCRegClass.contains(SrcReg) &&
1720 (PPC::G8RCRegClass.contains(DestReg) ||
1721 PPC::GPRCRegClass.contains(DestReg))) {
1722 bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
1723 unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;
1724 unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;
1725 unsigned CRNum = TRI->getEncodingValue(SrcReg);
1726 BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg);
1727 getKillRegState(KillSrc);
1728 if (CRNum == 7)
1729 return;
1730 // Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
1731 BuildMI(MBB, I, DL, get(ShCode), DestReg)
1732 .addReg(DestReg, RegState::Kill)
1733 .addImm(CRNum * 4 + 4)
1734 .addImm(28)
1735 .addImm(31);
1736 return;
1737 } else if (PPC::G8RCRegClass.contains(SrcReg) &&
1738 PPC::VSFRCRegClass.contains(DestReg)) {
1739 assert(Subtarget.hasDirectMove() &&
1740 "Subtarget doesn't support directmove, don't know how to copy.");
1741 BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
1742 NumGPRtoVSRSpill++;
1743 getKillRegState(KillSrc);
1744 return;
1745 } else if (PPC::VSFRCRegClass.contains(SrcReg) &&
1746 PPC::G8RCRegClass.contains(DestReg)) {
1747 assert(Subtarget.hasDirectMove() &&
1748 "Subtarget doesn't support directmove, don't know how to copy.");
1749 BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
1750 getKillRegState(KillSrc);
1751 return;
1752 } else if (PPC::SPERCRegClass.contains(SrcReg) &&
1753 PPC::GPRCRegClass.contains(DestReg)) {
1754 BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg);
1755 getKillRegState(KillSrc);
1756 return;
1757 } else if (PPC::GPRCRegClass.contains(SrcReg) &&
1758 PPC::SPERCRegClass.contains(DestReg)) {
1759 BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg);
1760 getKillRegState(KillSrc);
1761 return;
1762 } else if ((PPC::G8RCRegClass.contains(DestReg) ||
1763 PPC::GPRCRegClass.contains(DestReg)) &&
1764 SrcReg == PPC::CARRY) {
1765 bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
1766 BuildMI(MBB, I, DL, get(Is64Bit ? PPC::MFSPR8 : PPC::MFSPR), DestReg)
1767 .addImm(1)
1768 .addReg(PPC::CARRY, RegState::Implicit);
1769 return;
1770 } else if ((PPC::G8RCRegClass.contains(SrcReg) ||
1771 PPC::GPRCRegClass.contains(SrcReg)) &&
1772 DestReg == PPC::CARRY) {
1773 bool Is64Bit = PPC::G8RCRegClass.contains(SrcReg);
1774 BuildMI(MBB, I, DL, get(Is64Bit ? PPC::MTSPR8 : PPC::MTSPR))
1775 .addImm(1)
1776 .addReg(SrcReg)
1777 .addReg(PPC::CARRY, RegState::ImplicitDefine);
1778 return;
1779 }
1780
1781 unsigned Opc;
1782 if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
1783 Opc = PPC::OR;
1784 else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
1785 Opc = PPC::OR8;
1786 else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
1787 Opc = PPC::FMR;
1788 else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
1789 Opc = PPC::MCRF;
1790 else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
1791 Opc = PPC::VOR;
1792 else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
1793 // There are two different ways this can be done:
1794 // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
1795 // issue in VSU pipeline 0.
1796 // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
1797 // can go to either pipeline.
1798 // We'll always use xxlor here, because in practically all cases where
1799 // copies are generated, they are close enough to some use that the
1800 // lower-latency form is preferable.
1801 Opc = PPC::XXLOR;
1802 else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
1803 PPC::VSSRCRegClass.contains(DestReg, SrcReg))
1804 Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
1805 else if (Subtarget.pairedVectorMemops() &&
1806 PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
1807 if (SrcReg > PPC::VSRp15)
1808 SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
1809 else
1810 SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
1811 if (DestReg > PPC::VSRp15)
1812 DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
1813 else
1814 DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
1815 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
1816 addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1817 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
1818 addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
1819 return;
1820 }
1821 else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
1822 Opc = PPC::CROR;
1823 else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
1824 Opc = PPC::EVOR;
1825 else if ((PPC::ACCRCRegClass.contains(DestReg) ||
1826 PPC::UACCRCRegClass.contains(DestReg)) &&
1827 (PPC::ACCRCRegClass.contains(SrcReg) ||
1828 PPC::UACCRCRegClass.contains(SrcReg))) {
1829 // If primed, de-prime the source register, copy the individual registers
1830 // and prime the destination if needed. The vector subregisters are
1831 // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
1832 // source is primed, we need to re-prime it after the copy as well.
1833 PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
1834 bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);
1835 bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);
1836 MCRegister VSLSrcReg =
1837 PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1838 MCRegister VSLDestReg =
1839 PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1840 if (SrcPrimed)
1841 BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
1842 for (unsigned Idx = 0; Idx < 4; Idx++)
1843 BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)
1844 .addReg(VSLSrcReg + Idx)
1845 .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));
1846 if (DestPrimed)
1847 BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);
1848 if (SrcPrimed && !KillSrc)
1849 BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
1850 return;
1851 } else if (PPC::G8pRCRegClass.contains(DestReg) &&
1852 PPC::G8pRCRegClass.contains(SrcReg)) {
1853 // TODO: Handle G8RC to G8pRC (and vice versa) copy.
1854 unsigned DestRegIdx = DestReg - PPC::G8p0;
1855 MCRegister DestRegSub0 = PPC::X0 + 2 * DestRegIdx;
1856 MCRegister DestRegSub1 = PPC::X0 + 2 * DestRegIdx + 1;
1857 unsigned SrcRegIdx = SrcReg - PPC::G8p0;
1858 MCRegister SrcRegSub0 = PPC::X0 + 2 * SrcRegIdx;
1859 MCRegister SrcRegSub1 = PPC::X0 + 2 * SrcRegIdx + 1;
1860 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub0)
1861 .addReg(SrcRegSub0)
1862 .addReg(SrcRegSub0, getKillRegState(KillSrc));
1863 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub1)
1864 .addReg(SrcRegSub1)
1865 .addReg(SrcRegSub1, getKillRegState(KillSrc));
1866 return;
1867 } else if ((PPC::WACCRCRegClass.contains(DestReg) ||
1868 PPC::WACC_HIRCRegClass.contains(DestReg)) &&
1869 (PPC::WACCRCRegClass.contains(SrcReg) ||
1870 PPC::WACC_HIRCRegClass.contains(SrcReg))) {
1871
1872 Opc = PPC::WACCRCRegClass.contains(SrcReg) ? PPC::DMXXEXTFDMR512
1873 : PPC::DMXXEXTFDMR512_HI;
1874
1875 RegScavenger RS;
1877 RS.backward(std::next(I));
1878
1879 Register TmpReg1 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
1880 /* RestoreAfter */ false, 0,
1881 /* AllowSpill */ false);
1882
1883 RS.setRegUsed(TmpReg1);
1884 Register TmpReg2 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
1885 /* RestoreAfter */ false, 0,
1886 /* AllowSpill */ false);
1887
1888 BuildMI(MBB, I, DL, get(Opc))
1889 .addReg(TmpReg1, RegState::Define)
1890 .addReg(TmpReg2, RegState::Define)
1891 .addReg(SrcReg, getKillRegState(KillSrc));
1892
1893 Opc = PPC::WACCRCRegClass.contains(DestReg) ? PPC::DMXXINSTDMR512
1894 : PPC::DMXXINSTDMR512_HI;
1895
1896 BuildMI(MBB, I, DL, get(Opc), DestReg)
1897 .addReg(TmpReg1, RegState::Kill)
1898 .addReg(TmpReg2, RegState::Kill);
1899
1900 return;
1901 } else if (PPC::DMRRCRegClass.contains(DestReg) &&
1902 PPC::DMRRCRegClass.contains(SrcReg)) {
1903
1904 BuildMI(MBB, I, DL, get(PPC::DMMR), DestReg)
1905 .addReg(SrcReg, getKillRegState(KillSrc));
1906
1907 return;
1908
1909 } else
1910 llvm_unreachable("Impossible reg-to-reg copy");
1911
1912 const MCInstrDesc &MCID = get(Opc);
1913 if (MCID.getNumOperands() == 3)
1914 BuildMI(MBB, I, DL, MCID, DestReg)
1915 .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1916 else
1917 BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
1918}
1919
1920unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
1921 int OpcodeIndex = 0;
1922
1923 if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
1924 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
1926 } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
1927 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
1929 } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
1931 } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
1933 } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
1935 } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
1937 } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
1939 } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
1941 } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
1943 } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
1945 } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
1947 } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
1949 } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
1950 assert(Subtarget.pairedVectorMemops() &&
1951 "Register unexpected when paired memops are disabled.");
1953 } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
1954 assert(Subtarget.pairedVectorMemops() &&
1955 "Register unexpected when paired memops are disabled.");
1957 } else if (PPC::WACCRCRegClass.hasSubClassEq(RC)) {
1958 assert(Subtarget.pairedVectorMemops() &&
1959 "Register unexpected when paired memops are disabled.");
1961 } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
1962 assert(Subtarget.pairedVectorMemops() &&
1963 "Register unexpected when paired memops are disabled.");
1965 } else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {
1967 } else if (PPC::DMRROWRCRegClass.hasSubClassEq(RC)) {
1968 llvm_unreachable("TODO: Implement spill DMRROW regclass!");
1969 } else if (PPC::DMRROWpRCRegClass.hasSubClassEq(RC)) {
1970 llvm_unreachable("TODO: Implement spill DMRROWp regclass!");
1971 } else if (PPC::DMRpRCRegClass.hasSubClassEq(RC)) {
1973 } else if (PPC::DMRRCRegClass.hasSubClassEq(RC)) {
1975 } else {
1976 llvm_unreachable("Unknown regclass!");
1977 }
1978 return OpcodeIndex;
1979}
1980
1981unsigned
1983 ArrayRef<unsigned> OpcodesForSpill = getStoreOpcodesForSpillArray();
1984 return OpcodesForSpill[getSpillIndex(RC)];
1985}
1986
1987unsigned
1989 ArrayRef<unsigned> OpcodesForSpill = getLoadOpcodesForSpillArray();
1990 return OpcodesForSpill[getSpillIndex(RC)];
1991}
1992
1993void PPCInstrInfo::StoreRegToStackSlot(
1994 MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,
1995 const TargetRegisterClass *RC,
1996 SmallVectorImpl<MachineInstr *> &NewMIs) const {
1997 unsigned Opcode = getStoreOpcodeForSpill(RC);
1998 DebugLoc DL;
1999
2000 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2001 FuncInfo->setHasSpills();
2002
2004 BuildMI(MF, DL, get(Opcode)).addReg(SrcReg, getKillRegState(isKill)),
2005 FrameIdx));
2006
2007 if (PPC::CRRCRegClass.hasSubClassEq(RC) ||
2008 PPC::CRBITRCRegClass.hasSubClassEq(RC))
2009 FuncInfo->setSpillsCR();
2010
2011 if (isXFormMemOp(Opcode))
2012 FuncInfo->setHasNonRISpills();
2013}
2014
2017 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
2018 const TargetRegisterInfo *TRI) const {
2019 MachineFunction &MF = *MBB.getParent();
2021
2022 StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
2023
2024 for (MachineInstr *NewMI : NewMIs)
2025 MBB.insert(MI, NewMI);
2026
2027 const MachineFrameInfo &MFI = MF.getFrameInfo();
2031 MFI.getObjectAlign(FrameIdx));
2032 NewMIs.back()->addMemOperand(MF, MMO);
2033}
2034
2037 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
2038 const TargetRegisterInfo *TRI, Register VReg,
2039 MachineInstr::MIFlag Flags) const {
2040 // We need to avoid a situation in which the value from a VRRC register is
2041 // spilled using an Altivec instruction and reloaded into a VSRC register
2042 // using a VSX instruction. The issue with this is that the VSX
2043 // load/store instructions swap the doublewords in the vector and the Altivec
2044 // ones don't. The register classes on the spill/reload may be different if
2045 // the register is defined using an Altivec instruction and is then used by a
2046 // VSX instruction.
2047 RC = updatedRC(RC);
2048 storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI);
2049}
2050
2051void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
2052 unsigned DestReg, int FrameIdx,
2053 const TargetRegisterClass *RC,
2055 const {
2056 unsigned Opcode = getLoadOpcodeForSpill(RC);
2057 NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg),
2058 FrameIdx));
2059}
2060
2063 int FrameIdx, const TargetRegisterClass *RC,
2064 const TargetRegisterInfo *TRI) const {
2065 MachineFunction &MF = *MBB.getParent();
2067 DebugLoc DL;
2068 if (MI != MBB.end()) DL = MI->getDebugLoc();
2069
2070 LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
2071
2072 for (MachineInstr *NewMI : NewMIs)
2073 MBB.insert(MI, NewMI);
2074
2075 const MachineFrameInfo &MFI = MF.getFrameInfo();
2079 MFI.getObjectAlign(FrameIdx));
2080 NewMIs.back()->addMemOperand(MF, MMO);
2081}
2082
2085 int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
2086 Register VReg, MachineInstr::MIFlag Flags) const {
2087 // We need to avoid a situation in which the value from a VRRC register is
2088 // spilled using an Altivec instruction and reloaded into a VSRC register
2089 // using a VSX instruction. The issue with this is that the VSX
2090 // load/store instructions swap the doublewords in the vector and the Altivec
2091 // ones don't. The register classes on the spill/reload may be different if
2092 // the register is defined using an Altivec instruction and is then used by a
2093 // VSX instruction.
2094 RC = updatedRC(RC);
2095
2096 loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI);
2097}
2098
2101 assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
2102 if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
2103 Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
2104 else
2105 // Leave the CR# the same, but invert the condition.
2107 return false;
2108}
2109
2110// For some instructions, it is legal to fold ZERO into the RA register field.
2111// This function performs that fold by replacing the operand with PPC::ZERO,
2112// it does not consider whether the load immediate zero is no longer in use.
2114 Register Reg) const {
2115 // A zero immediate should always be loaded with a single li.
2116 unsigned DefOpc = DefMI.getOpcode();
2117 if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
2118 return false;
2119 if (!DefMI.getOperand(1).isImm())
2120 return false;
2121 if (DefMI.getOperand(1).getImm() != 0)
2122 return false;
2123
2124 // Note that we cannot here invert the arguments of an isel in order to fold
2125 // a ZERO into what is presented as the second argument. All we have here
2126 // is the condition bit, and that might come from a CR-logical bit operation.
2127
2128 const MCInstrDesc &UseMCID = UseMI.getDesc();
2129
2130 // Only fold into real machine instructions.
2131 if (UseMCID.isPseudo())
2132 return false;
2133
2134 // We need to find which of the User's operands is to be folded, that will be
2135 // the operand that matches the given register ID.
2136 unsigned UseIdx;
2137 for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
2138 if (UseMI.getOperand(UseIdx).isReg() &&
2139 UseMI.getOperand(UseIdx).getReg() == Reg)
2140 break;
2141
2142 assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI");
2143 assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
2144
2145 const MCOperandInfo *UseInfo = &UseMCID.operands()[UseIdx];
2146
2147 // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
2148 // register (which might also be specified as a pointer class kind).
2149 if (UseInfo->isLookupPtrRegClass()) {
2150 if (UseInfo->RegClass /* Kind */ != 1)
2151 return false;
2152 } else {
2153 if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
2154 UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
2155 return false;
2156 }
2157
2158 // Make sure this is not tied to an output register (or otherwise
2159 // constrained). This is true for ST?UX registers, for example, which
2160 // are tied to their output registers.
2161 if (UseInfo->Constraints != 0)
2162 return false;
2163
2164 MCRegister ZeroReg;
2165 if (UseInfo->isLookupPtrRegClass()) {
2166 bool isPPC64 = Subtarget.isPPC64();
2167 ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
2168 } else {
2169 ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
2170 PPC::ZERO8 : PPC::ZERO;
2171 }
2172
2173 LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");
2174 LLVM_DEBUG(UseMI.dump());
2175 UseMI.getOperand(UseIdx).setReg(ZeroReg);
2176 LLVM_DEBUG(dbgs() << "Into: ");
2177 LLVM_DEBUG(UseMI.dump());
2178 return true;
2179}
2180
2181// Folds zero into instructions which have a load immediate zero as an operand
2182// but also recognize zero as immediate zero. If the definition of the load
2183// has no more users it is deleted.
2185 Register Reg, MachineRegisterInfo *MRI) const {
2186 bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
2187 if (MRI->use_nodbg_empty(Reg))
2188 DefMI.eraseFromParent();
2189 return Changed;
2190}
2191
2193 for (MachineInstr &MI : MBB)
2194 if (MI.definesRegister(PPC::CTR, /*TRI=*/nullptr) ||
2195 MI.definesRegister(PPC::CTR8, /*TRI=*/nullptr))
2196 return true;
2197 return false;
2198}
2199
2200// We should make sure that, if we're going to predicate both sides of a
2201// condition (a diamond), that both sides don't define the counter register. We
2202// can predicate counter-decrement-based branches, but while that predicates
2203// the branching, it does not predicate the counter decrement. If we tried to
2204// merge the triangle into one predicated block, we'd decrement the counter
2205// twice.
2207 unsigned NumT, unsigned ExtraT,
2208 MachineBasicBlock &FMBB,
2209 unsigned NumF, unsigned ExtraF,
2210 BranchProbability Probability) const {
2211 return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
2212}
2213
2214
2216 // The predicated branches are identified by their type, not really by the
2217 // explicit presence of a predicate. Furthermore, some of them can be
2218 // predicated more than once. Because if conversion won't try to predicate
2219 // any instruction which already claims to be predicated (by returning true
2220 // here), always return false. In doing so, we let isPredicable() be the
2221 // final word on whether not the instruction can be (further) predicated.
2222
2223 return false;
2224}
2225
2227 const MachineBasicBlock *MBB,
2228 const MachineFunction &MF) const {
2229 switch (MI.getOpcode()) {
2230 default:
2231 break;
2232 // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
2233 // across them, since some FP operations may change content of FPSCR.
2234 // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
2235 case PPC::MFFS:
2236 case PPC::MTFSF:
2237 case PPC::FENCE:
2238 return true;
2239 }
2241}
2242
2244 ArrayRef<MachineOperand> Pred) const {
2245 unsigned OpC = MI.getOpcode();
2246 if (OpC == PPC::BLR || OpC == PPC::BLR8) {
2247 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2248 bool isPPC64 = Subtarget.isPPC64();
2249 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)
2250 : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
2251 // Need add Def and Use for CTR implicit operand.
2252 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2253 .addReg(Pred[1].getReg(), RegState::Implicit)
2255 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2256 MI.setDesc(get(PPC::BCLR));
2257 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2258 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2259 MI.setDesc(get(PPC::BCLRn));
2260 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2261 } else {
2262 MI.setDesc(get(PPC::BCCLR));
2263 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2264 .addImm(Pred[0].getImm())
2265 .add(Pred[1]);
2266 }
2267
2268 return true;
2269 } else if (OpC == PPC::B) {
2270 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2271 bool isPPC64 = Subtarget.isPPC64();
2272 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
2273 : (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
2274 // Need add Def and Use for CTR implicit operand.
2275 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2276 .addReg(Pred[1].getReg(), RegState::Implicit)
2278 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2279 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2280 MI.removeOperand(0);
2281
2282 MI.setDesc(get(PPC::BC));
2283 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2284 .add(Pred[1])
2285 .addMBB(MBB);
2286 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2287 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2288 MI.removeOperand(0);
2289
2290 MI.setDesc(get(PPC::BCn));
2291 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2292 .add(Pred[1])
2293 .addMBB(MBB);
2294 } else {
2295 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2296 MI.removeOperand(0);
2297
2298 MI.setDesc(get(PPC::BCC));
2299 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2300 .addImm(Pred[0].getImm())
2301 .add(Pred[1])
2302 .addMBB(MBB);
2303 }
2304
2305 return true;
2306 } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
2307 OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
2308 OpC == PPC::BCTRL8_RM) {
2309 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
2310 llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
2311
2312 bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
2313 OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
2314 bool isPPC64 = Subtarget.isPPC64();
2315
2316 if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2317 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
2318 : (setLR ? PPC::BCCTRL : PPC::BCCTR)));
2319 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2320 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2321 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
2322 : (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
2323 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2324 } else {
2325 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
2326 : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
2327 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2328 .addImm(Pred[0].getImm())
2329 .add(Pred[1]);
2330 }
2331
2332 // Need add Def and Use for LR implicit operand.
2333 if (setLR)
2334 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2335 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
2336 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
2337 if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
2338 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2340
2341 return true;
2342 }
2343
2344 return false;
2345}
2346
2348 ArrayRef<MachineOperand> Pred2) const {
2349 assert(Pred1.size() == 2 && "Invalid PPC first predicate");
2350 assert(Pred2.size() == 2 && "Invalid PPC second predicate");
2351
2352 if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
2353 return false;
2354 if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
2355 return false;
2356
2357 // P1 can only subsume P2 if they test the same condition register.
2358 if (Pred1[1].getReg() != Pred2[1].getReg())
2359 return false;
2360
2361 PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
2362 PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
2363
2364 if (P1 == P2)
2365 return true;
2366
2367 // Does P1 subsume P2, e.g. GE subsumes GT.
2368 if (P1 == PPC::PRED_LE &&
2369 (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
2370 return true;
2371 if (P1 == PPC::PRED_GE &&
2372 (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
2373 return true;
2374
2375 return false;
2376}
2377
2379 std::vector<MachineOperand> &Pred,
2380 bool SkipDead) const {
2381 // Note: At the present time, the contents of Pred from this function is
2382 // unused by IfConversion. This implementation follows ARM by pushing the
2383 // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
2384 // predicate, instructions defining CTR or CTR8 are also included as
2385 // predicate-defining instructions.
2386
2387 const TargetRegisterClass *RCs[] =
2388 { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
2389 &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
2390
2391 bool Found = false;
2392 for (const MachineOperand &MO : MI.operands()) {
2393 for (unsigned c = 0; c < std::size(RCs) && !Found; ++c) {
2394 const TargetRegisterClass *RC = RCs[c];
2395 if (MO.isReg()) {
2396 if (MO.isDef() && RC->contains(MO.getReg())) {
2397 Pred.push_back(MO);
2398 Found = true;
2399 }
2400 } else if (MO.isRegMask()) {
2401 for (MCPhysReg R : *RC)
2402 if (MO.clobbersPhysReg(R)) {
2403 Pred.push_back(MO);
2404 Found = true;
2405 }
2406 }
2407 }
2408 }
2409
2410 return Found;
2411}
2412
2414 Register &SrcReg2, int64_t &Mask,
2415 int64_t &Value) const {
2416 unsigned Opc = MI.getOpcode();
2417
2418 switch (Opc) {
2419 default: return false;
2420 case PPC::CMPWI:
2421 case PPC::CMPLWI:
2422 case PPC::CMPDI:
2423 case PPC::CMPLDI:
2424 SrcReg = MI.getOperand(1).getReg();
2425 SrcReg2 = 0;
2426 Value = MI.getOperand(2).getImm();
2427 Mask = 0xFFFF;
2428 return true;
2429 case PPC::CMPW:
2430 case PPC::CMPLW:
2431 case PPC::CMPD:
2432 case PPC::CMPLD:
2433 case PPC::FCMPUS:
2434 case PPC::FCMPUD:
2435 SrcReg = MI.getOperand(1).getReg();
2436 SrcReg2 = MI.getOperand(2).getReg();
2437 Value = 0;
2438 Mask = 0;
2439 return true;
2440 }
2441}
2442
2444 Register SrcReg2, int64_t Mask,
2445 int64_t Value,
2446 const MachineRegisterInfo *MRI) const {
2447 if (DisableCmpOpt)
2448 return false;
2449
2450 int OpC = CmpInstr.getOpcode();
2451 Register CRReg = CmpInstr.getOperand(0).getReg();
2452
2453 // FP record forms set CR1 based on the exception status bits, not a
2454 // comparison with zero.
2455 if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
2456 return false;
2457
2459 // The record forms set the condition register based on a signed comparison
2460 // with zero (so says the ISA manual). This is not as straightforward as it
2461 // seems, however, because this is always a 64-bit comparison on PPC64, even
2462 // for instructions that are 32-bit in nature (like slw for example).
2463 // So, on PPC32, for unsigned comparisons, we can use the record forms only
2464 // for equality checks (as those don't depend on the sign). On PPC64,
2465 // we are restricted to equality for unsigned 64-bit comparisons and for
2466 // signed 32-bit comparisons the applicability is more restricted.
2467 bool isPPC64 = Subtarget.isPPC64();
2468 bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
2469 bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
2470 bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
2471
2472 // Look through copies unless that gets us to a physical register.
2473 Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
2474 if (ActualSrc.isVirtual())
2475 SrcReg = ActualSrc;
2476
2477 // Get the unique definition of SrcReg.
2478 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2479 if (!MI) return false;
2480
2481 bool equalityOnly = false;
2482 bool noSub = false;
2483 if (isPPC64) {
2484 if (is32BitSignedCompare) {
2485 // We can perform this optimization only if SrcReg is sign-extending.
2486 if (isSignExtended(SrcReg, MRI))
2487 noSub = true;
2488 else
2489 return false;
2490 } else if (is32BitUnsignedCompare) {
2491 // We can perform this optimization, equality only, if SrcReg is
2492 // zero-extending.
2493 if (isZeroExtended(SrcReg, MRI)) {
2494 noSub = true;
2495 equalityOnly = true;
2496 } else
2497 return false;
2498 } else
2499 equalityOnly = is64BitUnsignedCompare;
2500 } else
2501 equalityOnly = is32BitUnsignedCompare;
2502
2503 if (equalityOnly) {
2504 // We need to check the uses of the condition register in order to reject
2505 // non-equality comparisons.
2507 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2508 I != IE; ++I) {
2509 MachineInstr *UseMI = &*I;
2510 if (UseMI->getOpcode() == PPC::BCC) {
2512 unsigned PredCond = PPC::getPredicateCondition(Pred);
2513 // We ignore hint bits when checking for non-equality comparisons.
2514 if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
2515 return false;
2516 } else if (UseMI->getOpcode() == PPC::ISEL ||
2517 UseMI->getOpcode() == PPC::ISEL8) {
2518 unsigned SubIdx = UseMI->getOperand(3).getSubReg();
2519 if (SubIdx != PPC::sub_eq)
2520 return false;
2521 } else
2522 return false;
2523 }
2524 }
2525
2526 MachineBasicBlock::iterator I = CmpInstr;
2527
2528 // Scan forward to find the first use of the compare.
2529 for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;
2530 ++I) {
2531 bool FoundUse = false;
2533 J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end();
2534 J != JE; ++J)
2535 if (&*J == &*I) {
2536 FoundUse = true;
2537 break;
2538 }
2539
2540 if (FoundUse)
2541 break;
2542 }
2543
2546
2547 // There are two possible candidates which can be changed to set CR[01].
2548 // One is MI, the other is a SUB instruction.
2549 // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2550 MachineInstr *Sub = nullptr;
2551 if (SrcReg2 != 0)
2552 // MI is not a candidate for CMPrr.
2553 MI = nullptr;
2554 // FIXME: Conservatively refuse to convert an instruction which isn't in the
2555 // same BB as the comparison. This is to allow the check below to avoid calls
2556 // (and other explicit clobbers); instead we should really check for these
2557 // more explicitly (in at least a few predecessors).
2558 else if (MI->getParent() != CmpInstr.getParent())
2559 return false;
2560 else if (Value != 0) {
2561 // The record-form instructions set CR bit based on signed comparison
2562 // against 0. We try to convert a compare against 1 or -1 into a compare
2563 // against 0 to exploit record-form instructions. For example, we change
2564 // the condition "greater than -1" into "greater than or equal to 0"
2565 // and "less than 1" into "less than or equal to 0".
2566
2567 // Since we optimize comparison based on a specific branch condition,
2568 // we don't optimize if condition code is used by more than once.
2569 if (equalityOnly || !MRI->hasOneUse(CRReg))
2570 return false;
2571
2572 MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg);
2573 if (UseMI->getOpcode() != PPC::BCC)
2574 return false;
2575
2577 unsigned PredCond = PPC::getPredicateCondition(Pred);
2578 unsigned PredHint = PPC::getPredicateHint(Pred);
2579 int16_t Immed = (int16_t)Value;
2580
2581 // When modifying the condition in the predicate, we propagate hint bits
2582 // from the original predicate to the new one.
2583 if (Immed == -1 && PredCond == PPC::PRED_GT)
2584 // We convert "greater than -1" into "greater than or equal to 0",
2585 // since we are assuming signed comparison by !equalityOnly
2586 Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
2587 else if (Immed == -1 && PredCond == PPC::PRED_LE)
2588 // We convert "less than or equal to -1" into "less than 0".
2589 Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
2590 else if (Immed == 1 && PredCond == PPC::PRED_LT)
2591 // We convert "less than 1" into "less than or equal to 0".
2592 Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
2593 else if (Immed == 1 && PredCond == PPC::PRED_GE)
2594 // We convert "greater than or equal to 1" into "greater than 0".
2595 Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
2596 else
2597 return false;
2598
2599 // Convert the comparison and its user to a compare against zero with the
2600 // appropriate predicate on the branch. Zero comparison might provide
2601 // optimization opportunities post-RA (see optimization in
2602 // PPCPreEmitPeephole.cpp).
2603 UseMI->getOperand(0).setImm(Pred);
2604 CmpInstr.getOperand(2).setImm(0);
2605 }
2606
2607 // Search for Sub.
2608 --I;
2609
2610 // Get ready to iterate backward from CmpInstr.
2611 MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin();
2612
2613 for (; I != E && !noSub; --I) {
2614 const MachineInstr &Instr = *I;
2615 unsigned IOpC = Instr.getOpcode();
2616
2617 if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) ||
2618 Instr.readsRegister(PPC::CR0, TRI)))
2619 // This instruction modifies or uses the record condition register after
2620 // the one we want to change. While we could do this transformation, it
2621 // would likely not be profitable. This transformation removes one
2622 // instruction, and so even forcing RA to generate one move probably
2623 // makes it unprofitable.
2624 return false;
2625
2626 // Check whether CmpInstr can be made redundant by the current instruction.
2627 if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
2628 OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
2629 (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
2630 ((Instr.getOperand(1).getReg() == SrcReg &&
2631 Instr.getOperand(2).getReg() == SrcReg2) ||
2632 (Instr.getOperand(1).getReg() == SrcReg2 &&
2633 Instr.getOperand(2).getReg() == SrcReg))) {
2634 Sub = &*I;
2635 break;
2636 }
2637
2638 if (I == B)
2639 // The 'and' is below the comparison instruction.
2640 return false;
2641 }
2642
2643 // Return false if no candidates exist.
2644 if (!MI && !Sub)
2645 return false;
2646
2647 // The single candidate is called MI.
2648 if (!MI) MI = Sub;
2649
2650 int NewOpC = -1;
2651 int MIOpC = MI->getOpcode();
2652 if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||
2653 MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)
2654 NewOpC = MIOpC;
2655 else {
2656 NewOpC = PPC::getRecordFormOpcode(MIOpC);
2657 if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
2658 NewOpC = MIOpC;
2659 }
2660
2661 // FIXME: On the non-embedded POWER architectures, only some of the record
2662 // forms are fast, and we should use only the fast ones.
2663
2664 // The defining instruction has a record form (or is already a record
2665 // form). It is possible, however, that we'll need to reverse the condition
2666 // code of the users.
2667 if (NewOpC == -1)
2668 return false;
2669
2670 // This transformation should not be performed if `nsw` is missing and is not
2671 // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
2672 // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
2673 // CRReg can reflect if compared values are equal, this optz is still valid.
2674 if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) &&
2675 Sub && !Sub->getFlag(MachineInstr::NoSWrap))
2676 return false;
2677
2678 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
2679 // needs to be updated to be based on SUB. Push the condition code
2680 // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
2681 // condition code of these operands will be modified.
2682 // Here, Value == 0 means we haven't converted comparison against 1 or -1 to
2683 // comparison against 0, which may modify predicate.
2684 bool ShouldSwap = false;
2685 if (Sub && Value == 0) {
2686 ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
2687 Sub->getOperand(2).getReg() == SrcReg;
2688
2689 // The operands to subf are the opposite of sub, so only in the fixed-point
2690 // case, invert the order.
2691 ShouldSwap = !ShouldSwap;
2692 }
2693
2694 if (ShouldSwap)
2696 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2697 I != IE; ++I) {
2698 MachineInstr *UseMI = &*I;
2699 if (UseMI->getOpcode() == PPC::BCC) {
2701 unsigned PredCond = PPC::getPredicateCondition(Pred);
2702 assert((!equalityOnly ||
2703 PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) &&
2704 "Invalid predicate for equality-only optimization");
2705 (void)PredCond; // To suppress warning in release build.
2706 PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
2708 } else if (UseMI->getOpcode() == PPC::ISEL ||
2709 UseMI->getOpcode() == PPC::ISEL8) {
2710 unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
2711 assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
2712 "Invalid CR bit for equality-only optimization");
2713
2714 if (NewSubReg == PPC::sub_lt)
2715 NewSubReg = PPC::sub_gt;
2716 else if (NewSubReg == PPC::sub_gt)
2717 NewSubReg = PPC::sub_lt;
2718
2719 SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
2720 NewSubReg));
2721 } else // We need to abort on a user we don't understand.
2722 return false;
2723 }
2724 assert(!(Value != 0 && ShouldSwap) &&
2725 "Non-zero immediate support and ShouldSwap"
2726 "may conflict in updating predicate");
2727
2728 // Create a new virtual register to hold the value of the CR set by the
2729 // record-form instruction. If the instruction was not previously in
2730 // record form, then set the kill flag on the CR.
2731 CmpInstr.eraseFromParent();
2732
2734 BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
2735 get(TargetOpcode::COPY), CRReg)
2736 .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
2737
2738 // Even if CR0 register were dead before, it is alive now since the
2739 // instruction we just built uses it.
2740 MI->clearRegisterDeads(PPC::CR0);
2741
2742 if (MIOpC != NewOpC) {
2743 // We need to be careful here: we're replacing one instruction with
2744 // another, and we need to make sure that we get all of the right
2745 // implicit uses and defs. On the other hand, the caller may be holding
2746 // an iterator to this instruction, and so we can't delete it (this is
2747 // specifically the case if this is the instruction directly after the
2748 // compare).
2749
2750 // Rotates are expensive instructions. If we're emitting a record-form
2751 // rotate that can just be an andi/andis, we should just emit that.
2752 if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
2753 Register GPRRes = MI->getOperand(0).getReg();
2754 int64_t SH = MI->getOperand(2).getImm();
2755 int64_t MB = MI->getOperand(3).getImm();
2756 int64_t ME = MI->getOperand(4).getImm();
2757 // We can only do this if both the start and end of the mask are in the
2758 // same halfword.
2759 bool MBInLoHWord = MB >= 16;
2760 bool MEInLoHWord = ME >= 16;
2761 uint64_t Mask = ~0LLU;
2762
2763 if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
2764 Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
2765 // The mask value needs to shift right 16 if we're emitting andis.
2766 Mask >>= MBInLoHWord ? 0 : 16;
2767 NewOpC = MIOpC == PPC::RLWINM
2768 ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
2769 : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
2770 } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
2771 (ME - MB + 1 == SH) && (MB >= 16)) {
2772 // If we are rotating by the exact number of bits as are in the mask
2773 // and the mask is in the least significant bits of the register,
2774 // that's just an andis. (as long as the GPR result has no uses).
2775 Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
2776 Mask >>= 16;
2777 NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
2778 }
2779 // If we've set the mask, we can transform.
2780 if (Mask != ~0LLU) {
2781 MI->removeOperand(4);
2782 MI->removeOperand(3);
2783 MI->getOperand(2).setImm(Mask);
2784 NumRcRotatesConvertedToRcAnd++;
2785 }
2786 } else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) {
2787 int64_t MB = MI->getOperand(3).getImm();
2788 if (MB >= 48) {
2789 uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
2790 NewOpC = PPC::ANDI8_rec;
2791 MI->removeOperand(3);
2792 MI->getOperand(2).setImm(Mask);
2793 NumRcRotatesConvertedToRcAnd++;
2794 }
2795 }
2796
2797 const MCInstrDesc &NewDesc = get(NewOpC);
2798 MI->setDesc(NewDesc);
2799
2800 for (MCPhysReg ImpDef : NewDesc.implicit_defs()) {
2801 if (!MI->definesRegister(ImpDef, /*TRI=*/nullptr)) {
2802 MI->addOperand(*MI->getParent()->getParent(),
2803 MachineOperand::CreateReg(ImpDef, true, true));
2804 }
2805 }
2806 for (MCPhysReg ImpUse : NewDesc.implicit_uses()) {
2807 if (!MI->readsRegister(ImpUse, /*TRI=*/nullptr)) {
2808 MI->addOperand(*MI->getParent()->getParent(),
2809 MachineOperand::CreateReg(ImpUse, false, true));
2810 }
2811 }
2812 }
2813 assert(MI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2814 "Record-form instruction does not define cr0?");
2815
2816 // Modify the condition code of operands in OperandsToUpdate.
2817 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2818 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2819 for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
2820 PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
2821
2822 for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
2823 SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
2824
2825 return true;
2826}
2827
2830 if (MRI->isSSA())
2831 return false;
2832
2833 Register SrcReg, SrcReg2;
2834 int64_t CmpMask, CmpValue;
2835 if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
2836 return false;
2837
2838 // Try to optimize the comparison against 0.
2839 if (CmpValue || !CmpMask || SrcReg2)
2840 return false;
2841
2842 // The record forms set the condition register based on a signed comparison
2843 // with zero (see comments in optimizeCompareInstr). Since we can't do the
2844 // equality checks in post-RA, we are more restricted on a unsigned
2845 // comparison.
2846 unsigned Opc = CmpMI.getOpcode();
2847 if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
2848 return false;
2849
2850 // The record forms are always based on a 64-bit comparison on PPC64
2851 // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
2852 // comparison. Since we can't do the equality checks in post-RA, we bail out
2853 // the case.
2854 if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
2855 return false;
2856
2857 // CmpMI can't be deleted if it has implicit def.
2858 if (CmpMI.hasImplicitDef())
2859 return false;
2860
2861 bool SrcRegHasOtherUse = false;
2862 MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
2863 if (!SrcMI || !SrcMI->definesRegister(SrcReg, /*TRI=*/nullptr))
2864 return false;
2865
2866 MachineOperand RegMO = CmpMI.getOperand(0);
2867 Register CRReg = RegMO.getReg();
2868 if (CRReg != PPC::CR0)
2869 return false;
2870
2871 // Make sure there is no def/use of CRReg between SrcMI and CmpMI.
2872 bool SeenUseOfCRReg = false;
2873 bool IsCRRegKilled = false;
2874 if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
2875 SeenUseOfCRReg) ||
2876 SrcMI->definesRegister(CRReg, /*TRI=*/nullptr) || SeenUseOfCRReg)
2877 return false;
2878
2879 int SrcMIOpc = SrcMI->getOpcode();
2880 int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
2881 if (NewOpC == -1)
2882 return false;
2883
2884 LLVM_DEBUG(dbgs() << "Replace Instr: ");
2885 LLVM_DEBUG(SrcMI->dump());
2886
2887 const MCInstrDesc &NewDesc = get(NewOpC);
2888 SrcMI->setDesc(NewDesc);
2889 MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
2891 SrcMI->clearRegisterDeads(CRReg);
2892
2893 assert(SrcMI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2894 "Record-form instruction does not define cr0?");
2895
2896 LLVM_DEBUG(dbgs() << "with: ");
2897 LLVM_DEBUG(SrcMI->dump());
2898 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
2899 LLVM_DEBUG(CmpMI.dump());
2900 return true;
2901}
2902
2905 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2906 const TargetRegisterInfo *TRI) const {
2907 const MachineOperand *BaseOp;
2908 OffsetIsScalable = false;
2909 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2910 return false;
2911 BaseOps.push_back(BaseOp);
2912 return true;
2913}
2914
2915static bool isLdStSafeToCluster(const MachineInstr &LdSt,
2916 const TargetRegisterInfo *TRI) {
2917 // If this is a volatile load/store, don't mess with it.
2918 if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
2919 return false;
2920
2921 if (LdSt.getOperand(2).isFI())
2922 return true;
2923
2924 assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");
2925 // Can't cluster if the instruction modifies the base register
2926 // or it is update form. e.g. ld r2,3(r2)
2927 if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))
2928 return false;
2929
2930 return true;
2931}
2932
2933// Only cluster instruction pair that have the same opcode, and they are
2934// clusterable according to PowerPC specification.
2935static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
2936 const PPCSubtarget &Subtarget) {
2937 switch (FirstOpc) {
2938 default:
2939 return false;
2940 case PPC::STD:
2941 case PPC::STFD:
2942 case PPC::STXSD:
2943 case PPC::DFSTOREf64:
2944 return FirstOpc == SecondOpc;
2945 // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
2946 // 32bit and 64bit instruction selection. They are clusterable pair though
2947 // they are different opcode.
2948 case PPC::STW:
2949 case PPC::STW8:
2950 return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;
2951 }
2952}
2953
2955 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
2956 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2957 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
2958 unsigned NumBytes) const {
2959
2960 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
2961 const MachineOperand &BaseOp1 = *BaseOps1.front();
2962 const MachineOperand &BaseOp2 = *BaseOps2.front();
2963 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2964 "Only base registers and frame indices are supported.");
2965
2966 // ClusterSize means the number of memory operations that will have been
2967 // clustered if this hook returns true.
2968 // Don't cluster memory op if there are already two ops clustered at least.
2969 if (ClusterSize > 2)
2970 return false;
2971
2972 // Cluster the load/store only when they have the same base
2973 // register or FI.
2974 if ((BaseOp1.isReg() != BaseOp2.isReg()) ||
2975 (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||
2976 (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
2977 return false;
2978
2979 // Check if the load/store are clusterable according to the PowerPC
2980 // specification.
2981 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2982 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2983 unsigned FirstOpc = FirstLdSt.getOpcode();
2984 unsigned SecondOpc = SecondLdSt.getOpcode();
2986 // Cluster the load/store only when they have the same opcode, and they are
2987 // clusterable opcode according to PowerPC specification.
2988 if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
2989 return false;
2990
2991 // Can't cluster load/store that have ordered or volatile memory reference.
2992 if (!isLdStSafeToCluster(FirstLdSt, TRI) ||
2993 !isLdStSafeToCluster(SecondLdSt, TRI))
2994 return false;
2995
2996 int64_t Offset1 = 0, Offset2 = 0;
2998 Width2 = LocationSize::precise(0);
2999 const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
3000 if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
3001 !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
3002 Width1 != Width2)
3003 return false;
3004
3005 assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
3006 "getMemOperandWithOffsetWidth return incorrect base op");
3007 // The caller should already have ordered FirstMemOp/SecondMemOp by offset.
3008 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
3009 return Offset1 + (int64_t)Width1.getValue() == Offset2;
3010}
3011
3012/// GetInstSize - Return the number of bytes of code the specified
3013/// instruction may be. This returns the maximum number of bytes.
3014///
3016 unsigned Opcode = MI.getOpcode();
3017
3018 if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
3019 const MachineFunction *MF = MI.getParent()->getParent();
3020 const char *AsmStr = MI.getOperand(0).getSymbolName();
3021 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
3022 } else if (Opcode == TargetOpcode::STACKMAP) {
3023 StackMapOpers Opers(&MI);
3024 return Opers.getNumPatchBytes();
3025 } else if (Opcode == TargetOpcode::PATCHPOINT) {
3026 PatchPointOpers Opers(&MI);
3027 return Opers.getNumPatchBytes();
3028 } else {
3029 return get(Opcode).getSize();
3030 }
3031}
3032
3033std::pair<unsigned, unsigned>
3035 // PPC always uses a direct mask.
3036 return std::make_pair(TF, 0u);
3037}
3038
3041 using namespace PPCII;
3042 static const std::pair<unsigned, const char *> TargetFlags[] = {
3043 {MO_PLT, "ppc-plt"},
3044 {MO_PIC_FLAG, "ppc-pic"},
3045 {MO_PCREL_FLAG, "ppc-pcrel"},
3046 {MO_GOT_FLAG, "ppc-got"},
3047 {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
3048 {MO_TLSGD_FLAG, "ppc-tlsgd"},
3049 {MO_TPREL_FLAG, "ppc-tprel"},
3050 {MO_TLSLDM_FLAG, "ppc-tlsldm"},
3051 {MO_TLSLD_FLAG, "ppc-tlsld"},
3052 {MO_TLSGDM_FLAG, "ppc-tlsgdm"},
3053 {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
3054 {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
3055 {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"},
3056 {MO_LO, "ppc-lo"},
3057 {MO_HA, "ppc-ha"},
3058 {MO_TPREL_LO, "ppc-tprel-lo"},
3059 {MO_TPREL_HA, "ppc-tprel-ha"},
3060 {MO_DTPREL_LO, "ppc-dtprel-lo"},
3061 {MO_TLSLD_LO, "ppc-tlsld-lo"},
3062 {MO_TOC_LO, "ppc-toc-lo"},
3063 {MO_TLS, "ppc-tls"},
3064 {MO_PIC_HA_FLAG, "ppc-ha-pic"},
3065 {MO_PIC_LO_FLAG, "ppc-lo-pic"},
3066 {MO_TPREL_PCREL_FLAG, "ppc-tprel-pcrel"},
3067 {MO_TLS_PCREL_FLAG, "ppc-tls-pcrel"},
3068 {MO_GOT_PCREL_FLAG, "ppc-got-pcrel"},
3069 };
3070 return ArrayRef(TargetFlags);
3071}
3072
3073// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
3074// The VSX versions have the advantage of a full 64-register target whereas
3075// the FP ones have the advantage of lower latency and higher throughput. So
3076// what we are after is using the faster instructions in low register pressure
3077// situations and using the larger register file in high register pressure
3078// situations.
3080 unsigned UpperOpcode, LowerOpcode;
3081 switch (MI.getOpcode()) {
3082 case PPC::DFLOADf32:
3083 UpperOpcode = PPC::LXSSP;
3084 LowerOpcode = PPC::LFS;
3085 break;
3086 case PPC::DFLOADf64:
3087 UpperOpcode = PPC::LXSD;
3088 LowerOpcode = PPC::LFD;
3089 break;
3090 case PPC::DFSTOREf32:
3091 UpperOpcode = PPC::STXSSP;
3092 LowerOpcode = PPC::STFS;
3093 break;
3094 case PPC::DFSTOREf64:
3095 UpperOpcode = PPC::STXSD;
3096 LowerOpcode = PPC::STFD;
3097 break;
3098 case PPC::XFLOADf32:
3099 UpperOpcode = PPC::LXSSPX;
3100 LowerOpcode = PPC::LFSX;
3101 break;
3102 case PPC::XFLOADf64:
3103 UpperOpcode = PPC::LXSDX;
3104 LowerOpcode = PPC::LFDX;
3105 break;
3106 case PPC::XFSTOREf32:
3107 UpperOpcode = PPC::STXSSPX;
3108 LowerOpcode = PPC::STFSX;
3109 break;
3110 case PPC::XFSTOREf64:
3111 UpperOpcode = PPC::STXSDX;
3112 LowerOpcode = PPC::STFDX;
3113 break;
3114 case PPC::LIWAX:
3115 UpperOpcode = PPC::LXSIWAX;
3116 LowerOpcode = PPC::LFIWAX;
3117 break;
3118 case PPC::LIWZX:
3119 UpperOpcode = PPC::LXSIWZX;
3120 LowerOpcode = PPC::LFIWZX;
3121 break;
3122 case PPC::STIWX:
3123 UpperOpcode = PPC::STXSIWX;
3124 LowerOpcode = PPC::STFIWX;
3125 break;
3126 default:
3127 llvm_unreachable("Unknown Operation!");
3128 }
3129
3130 Register TargetReg = MI.getOperand(0).getReg();
3131 unsigned Opcode;
3132 if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
3133 (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
3134 Opcode = LowerOpcode;
3135 else
3136 Opcode = UpperOpcode;
3137 MI.setDesc(get(Opcode));
3138 return true;
3139}
3140
3141static bool isAnImmediateOperand(const MachineOperand &MO) {
3142 return MO.isCPI() || MO.isGlobal() || MO.isImm();
3143}
3144
3146 auto &MBB = *MI.getParent();
3147 auto DL = MI.getDebugLoc();
3148
3149 switch (MI.getOpcode()) {
3150 case PPC::BUILD_UACC: {
3151 MCRegister ACC = MI.getOperand(0).getReg();
3152 MCRegister UACC = MI.getOperand(1).getReg();
3153 if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
3154 MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;
3155 MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;
3156 // FIXME: This can easily be improved to look up to the top of the MBB
3157 // to see if the inputs are XXLOR's. If they are and SrcReg is killed,
3158 // we can just re-target any such XXLOR's to DstVSR + offset.
3159 for (int VecNo = 0; VecNo < 4; VecNo++)
3160 BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)
3161 .addReg(SrcVSR + VecNo)
3162 .addReg(SrcVSR + VecNo);
3163 }
3164 // BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
3165 // So after building the 4 copies, we can replace the BUILD_UACC instruction
3166 // with a NOP.
3167 [[fallthrough]];
3168 }
3169 case PPC::KILL_PAIR: {
3170 MI.setDesc(get(PPC::UNENCODED_NOP));
3171 MI.removeOperand(1);
3172 MI.removeOperand(0);
3173 return true;
3174 }
3175 case TargetOpcode::LOAD_STACK_GUARD: {
3176 auto M = MBB.getParent()->getFunction().getParent();
3177 assert(
3178 (Subtarget.isTargetLinux() || M->getStackProtectorGuard() == "tls") &&
3179 "Only Linux target or tls mode are expected to contain "
3180 "LOAD_STACK_GUARD");
3181 int64_t Offset;
3182 if (M->getStackProtectorGuard() == "tls")
3183 Offset = M->getStackProtectorGuardOffset();
3184 else
3185 Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
3186 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3187 MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
3188 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3189 .addImm(Offset)
3190 .addReg(Reg);
3191 return true;
3192 }
3193 case PPC::PPCLdFixedAddr: {
3194 assert(Subtarget.getTargetTriple().isOSGlibc() &&
3195 "Only targets with Glibc expected to contain PPCLdFixedAddr");
3196 int64_t Offset = 0;
3197 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3198 MI.setDesc(get(PPC::LWZ));
3199 uint64_t FAType = MI.getOperand(1).getImm();
3200#undef PPC_LNX_FEATURE
3201#undef PPC_CPU
3202#define PPC_LNX_DEFINE_OFFSETS
3203#include "llvm/TargetParser/PPCTargetParser.def"
3204 bool IsLE = Subtarget.isLittleEndian();
3205 bool Is64 = Subtarget.isPPC64();
3206 if (FAType == PPC_FAWORD_HWCAP) {
3207 if (IsLE)
3208 Offset = Is64 ? PPC_HWCAP_OFFSET_LE64 : PPC_HWCAP_OFFSET_LE32;
3209 else
3210 Offset = Is64 ? PPC_HWCAP_OFFSET_BE64 : PPC_HWCAP_OFFSET_BE32;
3211 } else if (FAType == PPC_FAWORD_HWCAP2) {
3212 if (IsLE)
3213 Offset = Is64 ? PPC_HWCAP2_OFFSET_LE64 : PPC_HWCAP2_OFFSET_LE32;
3214 else
3215 Offset = Is64 ? PPC_HWCAP2_OFFSET_BE64 : PPC_HWCAP2_OFFSET_BE32;
3216 } else if (FAType == PPC_FAWORD_CPUID) {
3217 if (IsLE)
3218 Offset = Is64 ? PPC_CPUID_OFFSET_LE64 : PPC_CPUID_OFFSET_LE32;
3219 else
3220 Offset = Is64 ? PPC_CPUID_OFFSET_BE64 : PPC_CPUID_OFFSET_BE32;
3221 }
3222 assert(Offset && "Do not know the offset for this fixed addr load");
3223 MI.removeOperand(1);
3225 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3226 .addImm(Offset)
3227 .addReg(Reg);
3228 return true;
3229#define PPC_TGT_PARSER_UNDEF_MACROS
3230#include "llvm/TargetParser/PPCTargetParser.def"
3231#undef PPC_TGT_PARSER_UNDEF_MACROS
3232 }
3233 case PPC::DFLOADf32:
3234 case PPC::DFLOADf64:
3235 case PPC::DFSTOREf32:
3236 case PPC::DFSTOREf64: {
3237 assert(Subtarget.hasP9Vector() &&
3238 "Invalid D-Form Pseudo-ops on Pre-P9 target.");
3239 assert(MI.getOperand(2).isReg() &&
3240 isAnImmediateOperand(MI.getOperand(1)) &&
3241 "D-form op must have register and immediate operands");
3242 return expandVSXMemPseudo(MI);
3243 }
3244 case PPC::XFLOADf32:
3245 case PPC::XFSTOREf32:
3246 case PPC::LIWAX:
3247 case PPC::LIWZX:
3248 case PPC::STIWX: {
3249 assert(Subtarget.hasP8Vector() &&
3250 "Invalid X-Form Pseudo-ops on Pre-P8 target.");
3251 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3252 "X-form op must have register and register operands");
3253 return expandVSXMemPseudo(MI);
3254 }
3255 case PPC::XFLOADf64:
3256 case PPC::XFSTOREf64: {
3257 assert(Subtarget.hasVSX() &&
3258 "Invalid X-Form Pseudo-ops on target that has no VSX.");
3259 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3260 "X-form op must have register and register operands");
3261 return expandVSXMemPseudo(MI);
3262 }
3263 case PPC::SPILLTOVSR_LD: {
3264 Register TargetReg = MI.getOperand(0).getReg();
3265 if (PPC::VSFRCRegClass.contains(TargetReg)) {
3266 MI.setDesc(get(PPC::DFLOADf64));
3267 return expandPostRAPseudo(MI);
3268 }
3269 else
3270 MI.setDesc(get(PPC::LD));
3271 return true;
3272 }
3273 case PPC::SPILLTOVSR_ST: {
3274 Register SrcReg = MI.getOperand(0).getReg();
3275 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3276 NumStoreSPILLVSRRCAsVec++;
3277 MI.setDesc(get(PPC::DFSTOREf64));
3278 return expandPostRAPseudo(MI);
3279 } else {
3280 NumStoreSPILLVSRRCAsGpr++;
3281 MI.setDesc(get(PPC::STD));
3282 }
3283 return true;
3284 }
3285 case PPC::SPILLTOVSR_LDX: {
3286 Register TargetReg = MI.getOperand(0).getReg();
3287 if (PPC::VSFRCRegClass.contains(TargetReg))
3288 MI.setDesc(get(PPC::LXSDX));
3289 else
3290 MI.setDesc(get(PPC::LDX));
3291 return true;
3292 }
3293 case PPC::SPILLTOVSR_STX: {
3294 Register SrcReg = MI.getOperand(0).getReg();
3295 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3296 NumStoreSPILLVSRRCAsVec++;
3297 MI.setDesc(get(PPC::STXSDX));
3298 } else {
3299 NumStoreSPILLVSRRCAsGpr++;
3300 MI.setDesc(get(PPC::STDX));
3301 }
3302 return true;
3303 }
3304
3305 // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
3306 case PPC::CFENCE:
3307 case PPC::CFENCE8: {
3308 auto Val = MI.getOperand(0).getReg();
3309 unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
3310 BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
3311 BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
3313 .addReg(PPC::CR7)
3314 .addImm(1);
3315 MI.setDesc(get(PPC::ISYNC));
3316 MI.removeOperand(0);
3317 return true;
3318 }
3319 }
3320 return false;
3321}
3322
3323// Essentially a compile-time implementation of a compare->isel sequence.
3324// It takes two constants to compare, along with the true/false registers
3325// and the comparison type (as a subreg to a CR field) and returns one
3326// of the true/false registers, depending on the comparison results.
3327static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
3328 unsigned TrueReg, unsigned FalseReg,
3329 unsigned CRSubReg) {
3330 // Signed comparisons. The immediates are assumed to be sign-extended.
3331 if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {
3332 switch (CRSubReg) {
3333 default: llvm_unreachable("Unknown integer comparison type.");
3334 case PPC::sub_lt:
3335 return Imm1 < Imm2 ? TrueReg : FalseReg;
3336 case PPC::sub_gt:
3337 return Imm1 > Imm2 ? TrueReg : FalseReg;
3338 case PPC::sub_eq:
3339 return Imm1 == Imm2 ? TrueReg : FalseReg;
3340 }
3341 }
3342 // Unsigned comparisons.
3343 else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {
3344 switch (CRSubReg) {
3345 default: llvm_unreachable("Unknown integer comparison type.");
3346 case PPC::sub_lt:
3347 return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
3348 case PPC::sub_gt:
3349 return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
3350 case PPC::sub_eq:
3351 return Imm1 == Imm2 ? TrueReg : FalseReg;
3352 }
3353 }
3354 return PPC::NoRegister;
3355}
3356
3358 unsigned OpNo,
3359 int64_t Imm) const {
3360 assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
3361 // Replace the REG with the Immediate.
3362 Register InUseReg = MI.getOperand(OpNo).getReg();
3363 MI.getOperand(OpNo).ChangeToImmediate(Imm);
3364
3365 // We need to make sure that the MI didn't have any implicit use
3366 // of this REG any more. We don't call MI.implicit_operands().empty() to
3367 // return early, since MI's MCID might be changed in calling context, as a
3368 // result its number of explicit operands may be changed, thus the begin of
3369 // implicit operand is changed.
3371 int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, TRI, false);
3372 if (UseOpIdx >= 0) {
3373 MachineOperand &MO = MI.getOperand(UseOpIdx);
3374 if (MO.isImplicit())
3375 // The operands must always be in the following order:
3376 // - explicit reg defs,
3377 // - other explicit operands (reg uses, immediates, etc.),
3378 // - implicit reg defs
3379 // - implicit reg uses
3380 // Therefore, removing the implicit operand won't change the explicit
3381 // operands layout.
3382 MI.removeOperand(UseOpIdx);
3383 }
3384}
3385
3386// Replace an instruction with one that materializes a constant (and sets
3387// CR0 if the original instruction was a record-form instruction).
3389 const LoadImmediateInfo &LII) const {
3390 // Remove existing operands.
3391 int OperandToKeep = LII.SetCR ? 1 : 0;
3392 for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
3393 MI.removeOperand(i);
3394
3395 // Replace the instruction.
3396 if (LII.SetCR) {
3397 MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3398 // Set the immediate.
3399 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3400 .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);
3401 return;
3402 }
3403 else
3404 MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));
3405
3406 // Set the immediate.
3407 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3408 .addImm(LII.Imm);
3409}
3410
3412 bool &SeenIntermediateUse) const {
3413 assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&
3414 "Should be called after register allocation.");
3416 MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
3417 It++;
3418 SeenIntermediateUse = false;
3419 for (; It != E; ++It) {
3420 if (It->modifiesRegister(Reg, TRI))
3421 return &*It;
3422 if (It->readsRegister(Reg, TRI))
3423 SeenIntermediateUse = true;
3424 }
3425 return nullptr;
3426}
3427
3430 const DebugLoc &DL, Register Reg,
3431 int64_t Imm) const {
3433 "Register should be in non-SSA form after RA");
3434 bool isPPC64 = Subtarget.isPPC64();
3435 // FIXME: Materialization here is not optimal.
3436 // For some special bit patterns we can use less instructions.
3437 // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.
3438 if (isInt<16>(Imm)) {
3439 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm);
3440 } else if (isInt<32>(Imm)) {
3441 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg)
3442 .addImm(Imm >> 16);
3443 if (Imm & 0xFFFF)
3444 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg)
3445 .addReg(Reg, RegState::Kill)
3446 .addImm(Imm & 0xFFFF);
3447 } else {
3448 assert(isPPC64 && "Materializing 64-bit immediate to single register is "
3449 "only supported in PPC64");
3450 BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48);
3451 if ((Imm >> 32) & 0xFFFF)
3452 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3453 .addReg(Reg, RegState::Kill)
3454 .addImm((Imm >> 32) & 0xFFFF);
3455 BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg)
3456 .addReg(Reg, RegState::Kill)
3457 .addImm(32)
3458 .addImm(31);
3459 BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg)
3460 .addReg(Reg, RegState::Kill)
3461 .addImm((Imm >> 16) & 0xFFFF);
3462 if (Imm & 0xFFFF)
3463 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3464 .addReg(Reg, RegState::Kill)
3465 .addImm(Imm & 0xFFFF);
3466 }
3467}
3468
3469MachineInstr *PPCInstrInfo::getForwardingDefMI(
3471 unsigned &OpNoForForwarding,
3472 bool &SeenIntermediateUse) const {
3473 OpNoForForwarding = ~0U;
3474 MachineInstr *DefMI = nullptr;
3475 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3477 // If we're in SSA, get the defs through the MRI. Otherwise, only look
3478 // within the basic block to see if the register is defined using an
3479 // LI/LI8/ADDI/ADDI8.
3480 if (MRI->isSSA()) {
3481 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3482 if (!MI.getOperand(i).isReg())
3483 continue;
3484 Register Reg = MI.getOperand(i).getReg();
3485 if (!Reg.isVirtual())
3486 continue;
3487 Register TrueReg = TRI->lookThruCopyLike(Reg, MRI);
3488 if (TrueReg.isVirtual()) {
3489 MachineInstr *DefMIForTrueReg = MRI->getVRegDef(TrueReg);
3490 if (DefMIForTrueReg->getOpcode() == PPC::LI ||
3491 DefMIForTrueReg->getOpcode() == PPC::LI8 ||
3492 DefMIForTrueReg->getOpcode() == PPC::ADDI ||
3493 DefMIForTrueReg->getOpcode() == PPC::ADDI8) {
3494 OpNoForForwarding = i;
3495 DefMI = DefMIForTrueReg;
3496 // The ADDI and LI operand maybe exist in one instruction at same
3497 // time. we prefer to fold LI operand as LI only has one Imm operand
3498 // and is more possible to be converted. So if current DefMI is
3499 // ADDI/ADDI8, we continue to find possible LI/LI8.
3500 if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8)
3501 break;
3502 }
3503 }
3504 }
3505 } else {
3506 // Looking back through the definition for each operand could be expensive,
3507 // so exit early if this isn't an instruction that either has an immediate
3508 // form or is already an immediate form that we can handle.
3509 ImmInstrInfo III;
3510 unsigned Opc = MI.getOpcode();
3511 bool ConvertibleImmForm =
3512 Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||
3513 Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
3514 Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||
3515 Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||
3516 Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
3517 Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
3518 Opc == PPC::RLWINM8_rec;
3519 bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
3520 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3521 : false;
3522 if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))
3523 return nullptr;
3524
3525 // Don't convert or %X, %Y, %Y since that's just a register move.
3526 if ((Opc == PPC::OR || Opc == PPC::OR8) &&
3527 MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
3528 return nullptr;
3529 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3530 MachineOperand &MO = MI.getOperand(i);
3531 SeenIntermediateUse = false;
3532 if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
3533 Register Reg = MI.getOperand(i).getReg();
3534 // If we see another use of this reg between the def and the MI,
3535 // we want to flag it so the def isn't deleted.
3536 MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);
3537 if (DefMI) {
3538 // Is this register defined by some form of add-immediate (including
3539 // load-immediate) within this basic block?
3540 switch (DefMI->getOpcode()) {
3541 default:
3542 break;
3543 case PPC::LI:
3544 case PPC::LI8:
3545 case PPC::ADDItocL8:
3546 case PPC::ADDI:
3547 case PPC::ADDI8:
3548 OpNoForForwarding = i;
3549 return DefMI;
3550 }
3551 }
3552 }
3553 }
3554 }
3555 return OpNoForForwarding == ~0U ? nullptr : DefMI;
3556}
3557
3558unsigned PPCInstrInfo::getSpillTarget() const {
3559 // With P10, we may need to spill paired vector registers or accumulator
3560 // registers. MMA implies paired vectors, so we can just check that.
3561 bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
3562 // P11 uses the P10 target.
3563 return Subtarget.isISAFuture() ? 3 : IsP10Variant ?
3564 2 : Subtarget.hasP9Vector() ?
3565 1 : 0;
3566}
3567
3568ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {
3569 return {StoreSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3570}
3571
3572ArrayRef<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {
3573 return {LoadSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3574}
3575
3576// This opt tries to convert the following imm form to an index form to save an
3577// add for stack variables.
3578// Return false if no such pattern found.
3579//
3580// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3581// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
3582// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
3583//
3584// can be converted to:
3585//
3586// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
3587// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
3588//
3589// In order to eliminate ADD instr, make sure that:
3590// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
3591// new ADDI instr and ADDI can only take int16 Imm.
3592// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
3593// between ADDI and ADD instr since its original def in ADDI will be changed
3594// in new ADDI instr. And also there should be no new def for it between
3595// ADD and Imm instr as ToBeChangedReg will be used in Index instr.
3596// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
3597// between ADD and Imm instr since ADD instr will be eliminated.
3598// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
3599// moved to Index instr.
3601 MachineFunction *MF = MI.getParent()->getParent();
3603 bool PostRA = !MRI->isSSA();
3604 // Do this opt after PEI which is after RA. The reason is stack slot expansion
3605 // in PEI may expose such opportunities since in PEI, stack slot offsets to
3606 // frame base(OffsetAddi) are determined.
3607 if (!PostRA)
3608 return false;
3609 unsigned ToBeDeletedReg = 0;
3610 int64_t OffsetImm = 0;
3611 unsigned XFormOpcode = 0;
3612 ImmInstrInfo III;
3613
3614 // Check if Imm instr meets requirement.
3615 if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
3616 III))
3617 return false;
3618
3619 bool OtherIntermediateUse = false;
3620 MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
3621
3622 // Exit if there is other use between ADD and Imm instr or no def found.
3623 if (OtherIntermediateUse || !ADDMI)
3624 return false;
3625
3626 // Check if ADD instr meets requirement.
3627 if (!isADDInstrEligibleForFolding(*ADDMI))
3628 return false;
3629
3630 unsigned ScaleRegIdx = 0;
3631 int64_t OffsetAddi = 0;
3632 MachineInstr *ADDIMI = nullptr;
3633
3634 // Check if there is a valid ToBeChangedReg in ADDMI.
3635 // 1: It must be killed.
3636 // 2: Its definition must be a valid ADDIMI.
3637 // 3: It must satify int16 offset requirement.
3638 if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
3639 ScaleRegIdx = 2;
3640 else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
3641 ScaleRegIdx = 1;
3642 else
3643 return false;
3644
3645 assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
3646 Register ToBeChangedReg = ADDIMI->getOperand(0).getReg();
3647 Register ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
3648 auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
3650 for (auto It = ++Start; It != End; It++)
3651 if (It->modifiesRegister(Reg, &getRegisterInfo()))
3652 return true;
3653 return false;
3654 };
3655
3656 // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
3657 // treated as special zero when ScaleReg is R0/X0 register.
3658 if (III.ZeroIsSpecialOrig == III.ImmOpNo &&
3659 (ScaleReg == PPC::R0 || ScaleReg == PPC::X0))
3660 return false;
3661
3662 // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
3663 // and Imm Instr.
3664 if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
3665 return false;
3666
3667 // Now start to do the transformation.
3668 LLVM_DEBUG(dbgs() << "Replace instruction: "
3669 << "\n");
3670 LLVM_DEBUG(ADDIMI->dump());
3671 LLVM_DEBUG(ADDMI->dump());
3672 LLVM_DEBUG(MI.dump());
3673 LLVM_DEBUG(dbgs() << "with: "
3674 << "\n");
3675
3676 // Update ADDI instr.
3677 ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
3678
3679 // Update Imm instr.
3680 MI.setDesc(get(XFormOpcode));
3681 MI.getOperand(III.ImmOpNo)
3682 .ChangeToRegister(ScaleReg, false, false,
3683 ADDMI->getOperand(ScaleRegIdx).isKill());
3684
3685 MI.getOperand(III.OpNoForForwarding)
3686 .ChangeToRegister(ToBeChangedReg, false, false, true);
3687
3688 // Eliminate ADD instr.
3689 ADDMI->eraseFromParent();
3690
3691 LLVM_DEBUG(ADDIMI->dump());
3692 LLVM_DEBUG(MI.dump());
3693
3694 return true;
3695}
3696
3698 int64_t &Imm) const {
3699 unsigned Opc = ADDIMI.getOpcode();
3700
3701 // Exit if the instruction is not ADDI.
3702 if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
3703 return false;
3704
3705 // The operand may not necessarily be an immediate - it could be a relocation.
3706 if (!ADDIMI.getOperand(2).isImm())
3707 return false;
3708
3709 Imm = ADDIMI.getOperand(2).getImm();
3710
3711 return true;
3712}
3713
3715 unsigned Opc = ADDMI.getOpcode();
3716
3717 // Exit if the instruction is not ADD.
3718 return Opc == PPC::ADD4 || Opc == PPC::ADD8;
3719}
3720
3722 unsigned &ToBeDeletedReg,
3723 unsigned &XFormOpcode,
3724 int64_t &OffsetImm,
3725 ImmInstrInfo &III) const {
3726 // Only handle load/store.
3727 if (!MI.mayLoadOrStore())
3728 return false;
3729
3730 unsigned Opc = MI.getOpcode();
3731
3732 XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
3733
3734 // Exit if instruction has no index form.
3735 if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
3736 return false;
3737
3738 // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
3739 if (!instrHasImmForm(XFormOpcode,
3740 PPC::isVFRegister(MI.getOperand(0).getReg()), III, true))
3741 return false;
3742
3743 if (!III.IsSummingOperands)
3744 return false;
3745
3746 MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
3747 MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
3748 // Only support imm operands, not relocation slots or others.
3749 if (!ImmOperand.isImm())
3750 return false;
3751
3752 assert(RegOperand.isReg() && "Instruction format is not right");
3753
3754 // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
3755 if (!RegOperand.isKill())
3756 return false;
3757
3758 ToBeDeletedReg = RegOperand.getReg();
3759 OffsetImm = ImmOperand.getImm();
3760
3761 return true;
3762}
3763
3765 MachineInstr *&ADDIMI,
3766 int64_t &OffsetAddi,
3767 int64_t OffsetImm) const {
3768 assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
3769 MachineOperand &MO = ADDMI->getOperand(Index);
3770
3771 if (!MO.isKill())
3772 return false;
3773
3774 bool OtherIntermediateUse = false;
3775
3776 ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
3777 // Currently handle only one "add + Imminstr" pair case, exit if other
3778 // intermediate use for ToBeChangedReg found.
3779 // TODO: handle the cases where there are other "add + Imminstr" pairs
3780 // with same offset in Imminstr which is like:
3781 //
3782 // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3783 // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
3784 // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
3785 // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
3786 // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
3787 //
3788 // can be converted to:
3789 //
3790 // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
3791 // (OffsetAddi + OffsetImm)
3792 // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
3793 // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
3794
3795 if (OtherIntermediateUse || !ADDIMI)
3796 return false;
3797 // Check if ADDI instr meets requirement.
3798 if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
3799 return false;
3800
3801 if (isInt<16>(OffsetAddi + OffsetImm))
3802 return true;
3803 return false;
3804}
3805
3806// If this instruction has an immediate form and one of its operands is a
3807// result of a load-immediate or an add-immediate, convert it to
3808// the immediate form if the constant is in range.
3810 SmallSet<Register, 4> &RegsToUpdate,
3811 MachineInstr **KilledDef) const {
3812 MachineFunction *MF = MI.getParent()->getParent();
3814 bool PostRA = !MRI->isSSA();
3815 bool SeenIntermediateUse = true;
3816 unsigned ForwardingOperand = ~0U;
3817 MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
3818 SeenIntermediateUse);
3819 if (!DefMI)
3820 return false;
3821 assert(ForwardingOperand < MI.getNumOperands() &&
3822 "The forwarding operand needs to be valid at this point");
3823 bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
3824 bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
3825 if (KilledDef && KillFwdDefMI)
3826 *KilledDef = DefMI;
3827
3828 // Conservatively add defs from DefMI and defs/uses from MI to the set of
3829 // registers that need their kill flags updated.
3830 for (const MachineOperand &MO : DefMI->operands())
3831 if (MO.isReg() && MO.isDef())
3832 RegsToUpdate.insert(MO.getReg());
3833 for (const MachineOperand &MO : MI.operands())
3834 if (MO.isReg())
3835 RegsToUpdate.insert(MO.getReg());
3836
3837 // If this is a imm instruction and its register operands is produced by ADDI,
3838 // put the imm into imm inst directly.
3839 if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=
3840 PPC::INSTRUCTION_LIST_END &&
3841 transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand))
3842 return true;
3843
3844 ImmInstrInfo III;
3845 bool IsVFReg = MI.getOperand(0).isReg()
3846 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3847 : false;
3848 bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);
3849 // If this is a reg+reg instruction that has a reg+imm form,
3850 // and one of the operands is produced by an add-immediate,
3851 // try to convert it.
3852 if (HasImmForm &&
3853 transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
3854 KillFwdDefMI))
3855 return true;
3856
3857 // If this is a reg+reg instruction that has a reg+imm form,
3858 // and one of the operands is produced by LI, convert it now.
3859 if (HasImmForm &&
3860 transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI))
3861 return true;
3862
3863 // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
3864 // can be simpified to LI.
3865 if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef))
3866 return true;
3867
3868 return false;
3869}
3870
3872 MachineInstr **ToErase) const {
3873 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3874 Register FoldingReg = MI.getOperand(1).getReg();
3875 if (!FoldingReg.isVirtual())
3876 return false;
3877 MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
3878 if (SrcMI->getOpcode() != PPC::RLWINM &&
3879 SrcMI->getOpcode() != PPC::RLWINM_rec &&
3880 SrcMI->getOpcode() != PPC::RLWINM8 &&
3881 SrcMI->getOpcode() != PPC::RLWINM8_rec)
3882 return false;
3883 assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
3884 MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
3885 SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
3886 "Invalid PPC::RLWINM Instruction!");
3887 uint64_t SHSrc = SrcMI->getOperand(2).getImm();
3888 uint64_t SHMI = MI.getOperand(2).getImm();
3889 uint64_t MBSrc = SrcMI->getOperand(3).getImm();
3890 uint64_t MBMI = MI.getOperand(3).getImm();
3891 uint64_t MESrc = SrcMI->getOperand(4).getImm();
3892 uint64_t MEMI = MI.getOperand(4).getImm();
3893
3894 assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
3895 "Invalid PPC::RLWINM Instruction!");
3896 // If MBMI is bigger than MEMI, we always can not get run of ones.
3897 // RotatedSrcMask non-wrap:
3898 // 0........31|32........63
3899 // RotatedSrcMask: B---E B---E
3900 // MaskMI: -----------|--E B------
3901 // Result: ----- --- (Bad candidate)
3902 //
3903 // RotatedSrcMask wrap:
3904 // 0........31|32........63
3905 // RotatedSrcMask: --E B----|--E B----
3906 // MaskMI: -----------|--E B------
3907 // Result: --- -----|--- ----- (Bad candidate)
3908 //
3909 // One special case is RotatedSrcMask is a full set mask.
3910 // RotatedSrcMask full:
3911 // 0........31|32........63
3912 // RotatedSrcMask: ------EB---|-------EB---
3913 // MaskMI: -----------|--E B------
3914 // Result: -----------|--- ------- (Good candidate)
3915
3916 // Mark special case.
3917 bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
3918
3919 // For other MBMI > MEMI cases, just return.
3920 if ((MBMI > MEMI) && !SrcMaskFull)
3921 return false;
3922
3923 // Handle MBMI <= MEMI cases.
3924 APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
3925 // In MI, we only need low 32 bits of SrcMI, just consider about low 32
3926 // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
3927 // while in PowerPC ISA, lowerest bit is at index 63.
3928 APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
3929
3930 APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
3931 APInt FinalMask = RotatedSrcMask & MaskMI;
3932 uint32_t NewMB, NewME;
3933 bool Simplified = false;
3934
3935 // If final mask is 0, MI result should be 0 too.
3936 if (FinalMask.isZero()) {
3937 bool Is64Bit =
3938 (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
3939 Simplified = true;
3940 LLVM_DEBUG(dbgs() << "Replace Instr: ");
3941 LLVM_DEBUG(MI.dump());
3942
3943 if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
3944 // Replace MI with "LI 0"
3945 MI.removeOperand(4);
3946 MI.removeOperand(3);
3947 MI.removeOperand(2);
3948 MI.getOperand(1).ChangeToImmediate(0);
3949 MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
3950 } else {
3951 // Replace MI with "ANDI_rec reg, 0"
3952 MI.removeOperand(4);
3953 MI.removeOperand(3);
3954 MI.getOperand(2).setImm(0);
3955 MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3956 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3957 if (SrcMI->getOperand(1).isKill()) {
3958 MI.getOperand(1).setIsKill(true);
3959 SrcMI->getOperand(1).setIsKill(false);
3960 } else
3961 // About to replace MI.getOperand(1), clear its kill flag.
3962 MI.getOperand(1).setIsKill(false);
3963 }
3964
3965 LLVM_DEBUG(dbgs() << "With: ");
3966 LLVM_DEBUG(MI.dump());
3967
3968 } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
3969 NewMB <= NewME) ||
3970 SrcMaskFull) {
3971 // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
3972 // than NewME. Otherwise we get a 64 bit value after folding, but MI
3973 // return a 32 bit value.
3974 Simplified = true;
3975 LLVM_DEBUG(dbgs() << "Converting Instr: ");
3976 LLVM_DEBUG(MI.dump());
3977
3978 uint16_t NewSH = (SHSrc + SHMI) % 32;
3979 MI.getOperand(2).setImm(NewSH);
3980 // If SrcMI mask is full, no need to update MBMI and MEMI.
3981 if (!SrcMaskFull) {
3982 MI.getOperand(3).setImm(NewMB);
3983 MI.getOperand(4).setImm(NewME);
3984 }
3985 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3986 if (SrcMI->getOperand(1).isKill()) {
3987 MI.getOperand(1).setIsKill(true);
3988 SrcMI->getOperand(1).setIsKill(false);
3989 } else
3990 // About to replace MI.getOperand(1), clear its kill flag.
3991 MI.getOperand(1).setIsKill(false);
3992
3993 LLVM_DEBUG(dbgs() << "To: ");
3994 LLVM_DEBUG(MI.dump());
3995 }
3996 if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
3997 !SrcMI->hasImplicitDef()) {
3998 // If FoldingReg has no non-debug use and it has no implicit def (it
3999 // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
4000 // Otherwise keep it.
4001 *ToErase = SrcMI;
4002 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
4003 LLVM_DEBUG(SrcMI->dump());
4004 }
4005 return Simplified;
4006}
4007
4008bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
4009 ImmInstrInfo &III, bool PostRA) const {
4010 // The vast majority of the instructions would need their operand 2 replaced
4011 // with an immediate when switching to the reg+imm form. A marked exception
4012 // are the update form loads/stores for which a constant operand 2 would need
4013 // to turn into a displacement and move operand 1 to the operand 2 position.
4014 III.ImmOpNo = 2;
4015 III.OpNoForForwarding = 2;
4016 III.ImmWidth = 16;
4017 III.ImmMustBeMultipleOf = 1;
4018 III.TruncateImmTo = 0;
4019 III.IsSummingOperands = false;
4020 switch (Opc) {
4021 default: return false;
4022 case PPC::ADD4:
4023 case PPC::ADD8:
4024 III.SignedImm = true;
4025 III.ZeroIsSpecialOrig = 0;
4026 III.ZeroIsSpecialNew = 1;
4027 III.IsCommutative = true;
4028 III.IsSummingOperands = true;
4029 III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
4030 break;
4031 case PPC::ADDC:
4032 case PPC::ADDC8:
4033 III.SignedImm = true;
4034 III.ZeroIsSpecialOrig = 0;
4035 III.ZeroIsSpecialNew = 0;
4036 III.IsCommutative = true;
4037 III.IsSummingOperands = true;
4038 III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
4039 break;
4040 case PPC::ADDC_rec:
4041 III.SignedImm = true;
4042 III.ZeroIsSpecialOrig = 0;
4043 III.ZeroIsSpecialNew = 0;
4044 III.IsCommutative = true;
4045 III.IsSummingOperands = true;
4046 III.ImmOpcode = PPC::ADDIC_rec;
4047 break;
4048 case PPC::SUBFC:
4049 case PPC::SUBFC8:
4050 III.SignedImm = true;
4051 III.ZeroIsSpecialOrig = 0;
4052 III.ZeroIsSpecialNew = 0;
4053 III.IsCommutative = false;
4054 III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
4055 break;
4056 case PPC::CMPW:
4057 case PPC::CMPD:
4058 III.SignedImm = true;
4059 III.ZeroIsSpecialOrig = 0;
4060 III.ZeroIsSpecialNew = 0;
4061 III.IsCommutative = false;
4062 III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
4063 break;
4064 case PPC::CMPLW:
4065 case PPC::CMPLD:
4066 III.SignedImm = false;
4067 III.ZeroIsSpecialOrig = 0;
4068 III.ZeroIsSpecialNew = 0;
4069 III.IsCommutative = false;
4070 III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
4071 break;
4072 case PPC::AND_rec:
4073 case PPC::AND8_rec:
4074 case PPC::OR:
4075 case PPC::OR8:
4076 case PPC::XOR:
4077 case PPC::XOR8:
4078 III.SignedImm = false;
4079 III.ZeroIsSpecialOrig = 0;
4080 III.ZeroIsSpecialNew = 0;
4081 III.IsCommutative = true;
4082 switch(Opc) {
4083 default: llvm_unreachable("Unknown opcode");
4084 case PPC::AND_rec:
4085 III.ImmOpcode = PPC::ANDI_rec;
4086 break;
4087 case PPC::AND8_rec:
4088 III.ImmOpcode = PPC::ANDI8_rec;
4089 break;
4090 case PPC::OR: III.ImmOpcode = PPC::ORI; break;
4091 case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
4092 case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
4093 case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
4094 }
4095 break;
4096 case PPC::RLWNM:
4097 case PPC::RLWNM8:
4098 case PPC::RLWNM_rec:
4099 case PPC::RLWNM8_rec:
4100 case PPC::SLW:
4101 case PPC::SLW8:
4102 case PPC::SLW_rec:
4103 case PPC::SLW8_rec:
4104 case PPC::SRW:
4105 case PPC::SRW8:
4106 case PPC::SRW_rec:
4107 case PPC::SRW8_rec:
4108 case PPC::SRAW:
4109 case PPC::SRAW_rec:
4110 III.SignedImm = false;
4111 III.ZeroIsSpecialOrig = 0;
4112 III.ZeroIsSpecialNew = 0;
4113 III.IsCommutative = false;
4114 // This isn't actually true, but the instructions ignore any of the
4115 // upper bits, so any immediate loaded with an LI is acceptable.
4116 // This does not apply to shift right algebraic because a value
4117 // out of range will produce a -1/0.
4118 III.ImmWidth = 16;
4119 if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||
4120 Opc == PPC::RLWNM8_rec)
4121 III.TruncateImmTo = 5;
4122 else
4123 III.TruncateImmTo = 6;
4124 switch(Opc) {
4125 default: llvm_unreachable("Unknown opcode");
4126 case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
4127 case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
4128 case PPC::RLWNM_rec:
4129 III.ImmOpcode = PPC::RLWINM_rec;
4130 break;
4131 case PPC::RLWNM8_rec:
4132 III.ImmOpcode = PPC::RLWINM8_rec;
4133 break;
4134 case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
4135 case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
4136 case PPC::SLW_rec:
4137 III.ImmOpcode = PPC::RLWINM_rec;
4138 break;
4139 case PPC::SLW8_rec:
4140 III.ImmOpcode = PPC::RLWINM8_rec;
4141 break;
4142 case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
4143 case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
4144 case PPC::SRW_rec:
4145 III.ImmOpcode = PPC::RLWINM_rec;
4146 break;
4147 case PPC::SRW8_rec:
4148 III.ImmOpcode = PPC::RLWINM8_rec;
4149 break;
4150 case PPC::SRAW:
4151 III.ImmWidth = 5;
4152 III.TruncateImmTo = 0;
4153 III.ImmOpcode = PPC::SRAWI;
4154 break;
4155 case PPC::SRAW_rec:
4156 III.ImmWidth = 5;
4157 III.TruncateImmTo = 0;
4158 III.ImmOpcode = PPC::SRAWI_rec;
4159 break;
4160 }
4161 break;
4162 case PPC::RLDCL:
4163 case PPC::RLDCL_rec:
4164 case PPC::RLDCR:
4165 case PPC::RLDCR_rec:
4166 case PPC::SLD:
4167 case PPC::SLD_rec:
4168 case PPC::SRD:
4169 case PPC::SRD_rec:
4170 case PPC::SRAD:
4171 case PPC::SRAD_rec:
4172 III.SignedImm = false;
4173 III.ZeroIsSpecialOrig = 0;
4174 III.ZeroIsSpecialNew = 0;
4175 III.IsCommutative = false;
4176 // This isn't actually true, but the instructions ignore any of the
4177 // upper bits, so any immediate loaded with an LI is acceptable.
4178 // This does not apply to shift right algebraic because a value
4179 // out of range will produce a -1/0.
4180 III.ImmWidth = 16;
4181 if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||
4182 Opc == PPC::RLDCR_rec)
4183 III.TruncateImmTo = 6;
4184 else
4185 III.TruncateImmTo = 7;
4186 switch(Opc) {
4187 default: llvm_unreachable("Unknown opcode");
4188 case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
4189 case PPC::RLDCL_rec:
4190 III.ImmOpcode = PPC::RLDICL_rec;
4191 break;
4192 case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
4193 case PPC::RLDCR_rec:
4194 III.ImmOpcode = PPC::RLDICR_rec;
4195 break;
4196 case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
4197 case PPC::SLD_rec:
4198 III.ImmOpcode = PPC::RLDICR_rec;
4199 break;
4200 case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
4201 case PPC::SRD_rec:
4202 III.ImmOpcode = PPC::RLDICL_rec;
4203 break;
4204 case PPC::SRAD:
4205 III.ImmWidth = 6;
4206 III.TruncateImmTo = 0;
4207 III.ImmOpcode = PPC::SRADI;
4208 break;
4209 case PPC::SRAD_rec:
4210 III.ImmWidth = 6;
4211 III.TruncateImmTo = 0;
4212 III.ImmOpcode = PPC::SRADI_rec;
4213 break;
4214 }
4215 break;
4216 // Loads and stores:
4217 case PPC::LBZX:
4218 case PPC::LBZX8:
4219 case PPC::LHZX:
4220 case PPC::LHZX8:
4221 case PPC::LHAX:
4222 case PPC::LHAX8:
4223 case PPC::LWZX:
4224 case PPC::LWZX8:
4225 case PPC::LWAX:
4226 case PPC::LDX:
4227 case PPC::LFSX:
4228 case PPC::LFDX:
4229 case PPC::STBX:
4230 case PPC::STBX8:
4231 case PPC::STHX:
4232 case PPC::STHX8:
4233 case PPC::STWX:
4234 case PPC::STWX8:
4235 case PPC::STDX:
4236 case PPC::STFSX:
4237 case PPC::STFDX:
4238 III.SignedImm = true;
4239 III.ZeroIsSpecialOrig = 1;
4240 III.ZeroIsSpecialNew = 2;
4241 III.IsCommutative = true;
4242 III.IsSummingOperands = true;
4243 III.ImmOpNo = 1;
4244 III.OpNoForForwarding = 2;
4245 switch(Opc) {
4246 default: llvm_unreachable("Unknown opcode");
4247 case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
4248 case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
4249 case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
4250 case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
4251 case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
4252 case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
4253 case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
4254 case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
4255 case PPC::LWAX:
4256 III.ImmOpcode = PPC::LWA;
4257 III.ImmMustBeMultipleOf = 4;
4258 break;
4259 case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;
4260 case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
4261 case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
4262 case PPC::STBX: III.ImmOpcode = PPC::STB; break;
4263 case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
4264 case PPC::STHX: III.ImmOpcode = PPC::STH; break;
4265 case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
4266 case PPC::STWX: III.ImmOpcode = PPC::STW; break;
4267 case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
4268 case PPC::STDX:
4269 III.ImmOpcode = PPC::STD;
4270 III.ImmMustBeMultipleOf = 4;
4271 break;
4272 case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
4273 case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
4274 }
4275 break;
4276 case PPC::LBZUX:
4277 case PPC::LBZUX8:
4278 case PPC::LHZUX:
4279 case PPC::LHZUX8:
4280 case PPC::LHAUX:
4281 case PPC::LHAUX8:
4282 case PPC::LWZUX:
4283 case PPC::LWZUX8:
4284 case PPC::LDUX:
4285 case PPC::LFSUX:
4286 case PPC::LFDUX:
4287 case PPC::STBUX:
4288 case PPC::STBUX8:
4289 case PPC::STHUX:
4290 case PPC::STHUX8:
4291 case PPC::STWUX:
4292 case PPC::STWUX8:
4293 case PPC::STDUX:
4294 case PPC::STFSUX:
4295 case PPC::STFDUX:
4296 III.SignedImm = true;
4297 III.ZeroIsSpecialOrig = 2;
4298 III.ZeroIsSpecialNew = 3;
4299 III.IsCommutative = false;
4300 III.IsSummingOperands = true;
4301 III.ImmOpNo = 2;
4302 III.OpNoForForwarding = 3;
4303 switch(Opc) {
4304 default: llvm_unreachable("Unknown opcode");
4305 case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
4306 case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
4307 case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
4308 case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
4309 case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
4310 case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
4311 case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
4312 case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
4313 case PPC::LDUX:
4314 III.ImmOpcode = PPC::LDU;
4315 III.ImmMustBeMultipleOf = 4;
4316 break;
4317 case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
4318 case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
4319 case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
4320 case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
4321 case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
4322 case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
4323 case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
4324 case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
4325 case PPC::STDUX:
4326 III.ImmOpcode = PPC::STDU;
4327 III.ImmMustBeMultipleOf = 4;
4328 break;
4329 case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
4330 case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
4331 }
4332 break;
4333 // Power9 and up only. For some of these, the X-Form version has access to all
4334 // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
4335 // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
4336 // into or stored from is one of the VR registers.
4337 case PPC::LXVX:
4338 case PPC::LXSSPX:
4339 case PPC::LXSDX:
4340 case PPC::STXVX:
4341 case PPC::STXSSPX:
4342 case PPC::STXSDX:
4343 case PPC::XFLOADf32:
4344 case PPC::XFLOADf64:
4345 case PPC::XFSTOREf32:
4346 case PPC::XFSTOREf64:
4347 if (!Subtarget.hasP9Vector())
4348 return false;
4349 III.SignedImm = true;
4350 III.ZeroIsSpecialOrig = 1;
4351 III.ZeroIsSpecialNew = 2;
4352 III.IsCommutative = true;
4353 III.IsSummingOperands = true;
4354 III.ImmOpNo = 1;
4355 III.OpNoForForwarding = 2;
4356 III.ImmMustBeMultipleOf = 4;
4357 switch(Opc) {
4358 default: llvm_unreachable("Unknown opcode");
4359 case PPC::LXVX:
4360 III.ImmOpcode = PPC::LXV;
4361 III.ImmMustBeMultipleOf = 16;
4362 break;
4363 case PPC::LXSSPX:
4364 if (PostRA) {
4365 if (IsVFReg)
4366 III.ImmOpcode = PPC::LXSSP;
4367 else {
4368 III.ImmOpcode = PPC::LFS;
4369 III.ImmMustBeMultipleOf = 1;
4370 }
4371 break;
4372 }
4373 [[fallthrough]];
4374 case PPC::XFLOADf32:
4375 III.ImmOpcode = PPC::DFLOADf32;
4376 break;
4377 case PPC::LXSDX:
4378 if (PostRA) {
4379 if (IsVFReg)
4380 III.ImmOpcode = PPC::LXSD;
4381 else {
4382 III.ImmOpcode = PPC::LFD;
4383 III.ImmMustBeMultipleOf = 1;
4384 }
4385 break;
4386 }
4387 [[fallthrough]];
4388 case PPC::XFLOADf64:
4389 III.ImmOpcode = PPC::DFLOADf64;
4390 break;
4391 case PPC::STXVX:
4392 III.ImmOpcode = PPC::STXV;
4393 III.ImmMustBeMultipleOf = 16;
4394 break;
4395 case PPC::STXSSPX:
4396 if (PostRA) {
4397 if (IsVFReg)
4398 III.ImmOpcode = PPC::STXSSP;
4399 else {
4400 III.ImmOpcode = PPC::STFS;
4401 III.ImmMustBeMultipleOf = 1;
4402 }
4403 break;
4404 }
4405 [[fallthrough]];
4406 case PPC::XFSTOREf32:
4407 III.ImmOpcode = PPC::DFSTOREf32;
4408 break;
4409 case PPC::STXSDX:
4410 if (PostRA) {
4411 if (IsVFReg)
4412 III.ImmOpcode = PPC::STXSD;
4413 else {
4414 III.ImmOpcode = PPC::STFD;
4415 III.ImmMustBeMultipleOf = 1;
4416 }
4417 break;
4418 }
4419 [[fallthrough]];
4420 case PPC::XFSTOREf64:
4421 III.ImmOpcode = PPC::DFSTOREf64;
4422 break;
4423 }
4424 break;
4425 }
4426 return true;
4427}
4428
4429// Utility function for swaping two arbitrary operands of an instruction.
4430static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
4431 assert(Op1 != Op2 && "Cannot swap operand with itself.");
4432
4433 unsigned MaxOp = std::max(Op1, Op2);
4434 unsigned MinOp = std::min(Op1, Op2);
4435 MachineOperand MOp1 = MI.getOperand(MinOp);
4436 MachineOperand MOp2 = MI.getOperand(MaxOp);
4437 MI.removeOperand(std::max(Op1, Op2));
4438 MI.removeOperand(std::min(Op1, Op2));
4439
4440 // If the operands we are swapping are the two at the end (the common case)
4441 // we can just remove both and add them in the opposite order.
4442 if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {
4443 MI.addOperand(MOp2);
4444 MI.addOperand(MOp1);
4445 } else {
4446 // Store all operands in a temporary vector, remove them and re-add in the
4447 // right order.
4449 unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
4450 for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
4451 MOps.push_back(MI.getOperand(i));
4452 MI.removeOperand(i);
4453 }
4454 // MOp2 needs to be added next.
4455 MI.addOperand(MOp2);
4456 // Now add the rest.
4457 for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
4458 if (i == MaxOp)
4459 MI.addOperand(MOp1);
4460 else {
4461 MI.addOperand(MOps.back());
4462 MOps.pop_back();
4463 }
4464 }
4465 }
4466}
4467
4468// Check if the 'MI' that has the index OpNoForForwarding
4469// meets the requirement described in the ImmInstrInfo.
4470bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
4471 const ImmInstrInfo &III,
4472 unsigned OpNoForForwarding
4473 ) const {
4474 // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
4475 // would not work pre-RA, we can only do the check post RA.
4476 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4477 if (MRI.isSSA())
4478 return false;
4479
4480 // Cannot do the transform if MI isn't summing the operands.
4481 if (!III.IsSummingOperands)
4482 return false;
4483
4484 // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
4485 if (!III.ZeroIsSpecialOrig)
4486 return false;
4487
4488 // We cannot do the transform if the operand we are trying to replace
4489 // isn't the same as the operand the instruction allows.
4490 if (OpNoForForwarding != III.OpNoForForwarding)
4491 return false;
4492
4493 // Check if the instruction we are trying to transform really has
4494 // the special zero register as its operand.
4495 if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
4496 MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
4497 return false;
4498
4499 // This machine instruction is convertible if it is,
4500 // 1. summing the operands.
4501 // 2. one of the operands is special zero register.
4502 // 3. the operand we are trying to replace is allowed by the MI.
4503 return true;
4504}
4505
4506// Check if the DefMI is the add inst and set the ImmMO and RegMO
4507// accordingly.
4508bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
4509 const ImmInstrInfo &III,
4510 MachineOperand *&ImmMO,
4511 MachineOperand *&RegMO) const {
4512 unsigned Opc = DefMI.getOpcode();
4513 if (Opc != PPC::ADDItocL8 && Opc != PPC::ADDI && Opc != PPC::ADDI8)
4514 return false;
4515
4516 // Skip the optimization of transformTo[NewImm|Imm]FormFedByAdd for ADDItocL8
4517 // on AIX which is used for toc-data access. TODO: Follow up to see if it can
4518 // apply for AIX toc-data as well.
4519 if (Opc == PPC::ADDItocL8 && Subtarget.isAIX())
4520 return false;
4521
4522 assert(DefMI.getNumOperands() >= 3 &&
4523 "Add inst must have at least three operands");
4524 RegMO = &DefMI.getOperand(1);
4525 ImmMO = &DefMI.getOperand(2);
4526
4527 // Before RA, ADDI first operand could be a frame index.
4528 if (!RegMO->isReg())
4529 return false;
4530
4531 // This DefMI is elgible for forwarding if it is:
4532 // 1. add inst
4533 // 2. one of the operands is Imm/CPI/Global.
4534 return isAnImmediateOperand(*ImmMO);
4535}
4536
4537bool PPCInstrInfo::isRegElgibleForForwarding(
4538 const MachineOperand &RegMO, const MachineInstr &DefMI,
4539 const MachineInstr &MI, bool KillDefMI,
4540 bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
4541 // x = addi y, imm
4542 // ...
4543 // z = lfdx 0, x -> z = lfd imm(y)
4544 // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
4545 // of "y" between the DEF of "x" and "z".
4546 // The query is only valid post RA.
4547 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4548 if (MRI.isSSA())
4549 return false;
4550
4551 Register Reg = RegMO.getReg();
4552
4553 // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
4555 MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
4556 It++;
4557 for (; It != E; ++It) {
4558 if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4559 return false;
4560 else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4561 IsFwdFeederRegKilled = true;
4562 if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4563 SeenIntermediateUse = true;
4564 // Made it to DefMI without encountering a clobber.
4565 if ((&*It) == &DefMI)
4566 break;
4567 }
4568 assert((&*It) == &DefMI && "DefMI is missing");
4569
4570 // If DefMI also defines the register to be forwarded, we can only forward it
4571 // if DefMI is being erased.
4572 if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
4573 return KillDefMI;
4574
4575 return true;
4576}
4577
4578bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
4579 const MachineInstr &DefMI,
4580 const ImmInstrInfo &III,
4581 int64_t &Imm,
4582 int64_t BaseImm) const {
4583 assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
4584 if (DefMI.getOpcode() == PPC::ADDItocL8) {
4585 // The operand for ADDItocL8 is CPI, which isn't imm at compiling time,
4586 // However, we know that, it is 16-bit width, and has the alignment of 4.
4587 // Check if the instruction met the requirement.
4588 if (III.ImmMustBeMultipleOf > 4 ||
4589 III.TruncateImmTo || III.ImmWidth != 16)
4590 return false;
4591
4592 // Going from XForm to DForm loads means that the displacement needs to be
4593 // not just an immediate but also a multiple of 4, or 16 depending on the
4594 // load. A DForm load cannot be represented if it is a multiple of say 2.
4595 // XForm loads do not have this restriction.
4596 if (ImmMO.isGlobal()) {
4597 const DataLayout &DL = ImmMO.getGlobal()->getDataLayout();
4599 return false;
4600 }
4601
4602 return true;
4603 }
4604
4605 if (ImmMO.isImm()) {
4606 // It is Imm, we need to check if the Imm fit the range.
4607 // Sign-extend to 64-bits.
4608 // DefMI may be folded with another imm form instruction, the result Imm is
4609 // the sum of Imm of DefMI and BaseImm which is from imm form instruction.
4610 APInt ActualValue(64, ImmMO.getImm() + BaseImm, true);
4611 if (III.SignedImm && !ActualValue.isSignedIntN(III.ImmWidth))
4612 return false;
4613 if (!III.SignedImm && !ActualValue.isIntN(III.ImmWidth))
4614 return false;
4615 Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm);
4616
4617 if (Imm % III.ImmMustBeMultipleOf)
4618 return false;
4619 if (III.TruncateImmTo)
4620 Imm &= ((1 << III.TruncateImmTo) - 1);
4621 }
4622 else
4623 return false;
4624
4625 // This ImmMO is forwarded if it meets the requriement describle
4626 // in ImmInstrInfo
4627 return true;
4628}
4629
4630bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
4631 unsigned OpNoForForwarding,
4632 MachineInstr **KilledDef) const {
4633 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4634 !DefMI.getOperand(1).isImm())
4635 return false;
4636
4637 MachineFunction *MF = MI.getParent()->getParent();
4639 bool PostRA = !MRI->isSSA();
4640
4641 int64_t Immediate = DefMI.getOperand(1).getImm();
4642 // Sign-extend to 64-bits.
4643 int64_t SExtImm = SignExtend64<16>(Immediate);
4644
4645 bool ReplaceWithLI = false;
4646 bool Is64BitLI = false;
4647 int64_t NewImm = 0;
4648 bool SetCR = false;
4649 unsigned Opc = MI.getOpcode();
4650 switch (Opc) {
4651 default:
4652 return false;
4653
4654 // FIXME: Any branches conditional on such a comparison can be made
4655 // unconditional. At this time, this happens too infrequently to be worth
4656 // the implementation effort, but if that ever changes, we could convert
4657 // such a pattern here.
4658 case PPC::CMPWI:
4659 case PPC::CMPLWI:
4660 case PPC::CMPDI:
4661 case PPC::CMPLDI: {
4662 // Doing this post-RA would require dataflow analysis to reliably find uses
4663 // of the CR register set by the compare.
4664 // No need to fixup killed/dead flag since this transformation is only valid
4665 // before RA.
4666 if (PostRA)
4667 return false;
4668 // If a compare-immediate is fed by an immediate and is itself an input of
4669 // an ISEL (the most common case) into a COPY of the correct register.
4670 bool Changed = false;
4671 Register DefReg = MI.getOperand(0).getReg();
4672 int64_t Comparand = MI.getOperand(2).getImm();
4673 int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0
4674 ? (Comparand | 0xFFFFFFFFFFFF0000)
4675 : Comparand;
4676
4677 for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
4678 unsigned UseOpc = CompareUseMI.getOpcode();
4679 if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
4680 continue;
4681 unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
4682 Register TrueReg = CompareUseMI.getOperand(1).getReg();
4683 Register FalseReg = CompareUseMI.getOperand(2).getReg();
4684 unsigned RegToCopy =
4685 selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg);
4686 if (RegToCopy == PPC::NoRegister)
4687 continue;
4688 // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
4689 if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
4690 CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
4691 replaceInstrOperandWithImm(CompareUseMI, 1, 0);
4692 CompareUseMI.removeOperand(3);
4693 CompareUseMI.removeOperand(2);
4694 continue;
4695 }
4696 LLVM_DEBUG(
4697 dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
4698 LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());
4699 LLVM_DEBUG(dbgs() << "Is converted to:\n");
4700 // Convert to copy and remove unneeded operands.
4701 CompareUseMI.setDesc(get(PPC::COPY));
4702 CompareUseMI.removeOperand(3);
4703 CompareUseMI.removeOperand(RegToCopy == TrueReg ? 2 : 1);
4704 CmpIselsConverted++;
4705 Changed = true;
4706 LLVM_DEBUG(CompareUseMI.dump());
4707 }
4708 if (Changed)
4709 return true;
4710 // This may end up incremented multiple times since this function is called
4711 // during a fixed-point transformation, but it is only meant to indicate the
4712 // presence of this opportunity.
4713 MissedConvertibleImmediateInstrs++;
4714 return false;
4715 }
4716
4717 // Immediate forms - may simply be convertable to an LI.
4718 case PPC::ADDI:
4719 case PPC::ADDI8: {
4720 // Does the sum fit in a 16-bit signed field?
4721 int64_t Addend = MI.getOperand(2).getImm();
4722 if (isInt<16>(Addend + SExtImm)) {
4723 ReplaceWithLI = true;
4724 Is64BitLI = Opc == PPC::ADDI8;
4725 NewImm = Addend + SExtImm;
4726 break;
4727 }
4728 return false;
4729 }
4730 case PPC::SUBFIC:
4731 case PPC::SUBFIC8: {
4732 // Only transform this if the CARRY implicit operand is dead.
4733 if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead())
4734 return false;
4735 int64_t Minuend = MI.getOperand(2).getImm();
4736 if (isInt<16>(Minuend - SExtImm)) {
4737 ReplaceWithLI = true;
4738 Is64BitLI = Opc == PPC::SUBFIC8;
4739 NewImm = Minuend - SExtImm;
4740 break;
4741 }
4742 return false;
4743 }
4744 case PPC::RLDICL:
4745 case PPC::RLDICL_rec:
4746 case PPC::RLDICL_32:
4747 case PPC::RLDICL_32_64: {
4748 // Use APInt's rotate function.
4749 int64_t SH = MI.getOperand(2).getImm();
4750 int64_t MB = MI.getOperand(3).getImm();
4751 APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
4752 SExtImm, true);
4753 InVal = InVal.rotl(SH);
4754 uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1;
4755 InVal &= Mask;
4756 // Can't replace negative values with an LI as that will sign-extend
4757 // and not clear the left bits. If we're setting the CR bit, we will use
4758 // ANDI_rec which won't sign extend, so that's safe.
4759 if (isUInt<15>(InVal.getSExtValue()) ||
4760 (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
4761 ReplaceWithLI = true;
4762 Is64BitLI = Opc != PPC::RLDICL_32;
4763 NewImm = InVal.getSExtValue();
4764 SetCR = Opc == PPC::RLDICL_rec;
4765 break;
4766 }
4767 return false;
4768 }
4769 case PPC::RLWINM:
4770 case PPC::RLWINM8:
4771 case PPC::RLWINM_rec:
4772 case PPC::RLWINM8_rec: {
4773 int64_t SH = MI.getOperand(2).getImm();
4774 int64_t MB = MI.getOperand(3).getImm();
4775 int64_t ME = MI.getOperand(4).getImm();
4776 APInt InVal(32, SExtImm, true);
4777 InVal = InVal.rotl(SH);
4778 APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB);
4779 InVal &= Mask;
4780 // Can't replace negative values with an LI as that will sign-extend
4781 // and not clear the left bits. If we're setting the CR bit, we will use
4782 // ANDI_rec which won't sign extend, so that's safe.
4783 bool ValueFits = isUInt<15>(InVal.getSExtValue());
4784 ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
4785 isUInt<16>(InVal.getSExtValue()));
4786 if (ValueFits) {
4787 ReplaceWithLI = true;
4788 Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
4789 NewImm = InVal.getSExtValue();
4790 SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
4791 break;
4792 }
4793 return false;
4794 }
4795 case PPC::ORI:
4796 case PPC::ORI8:
4797 case PPC::XORI:
4798 case PPC::XORI8: {
4799 int64_t LogicalImm = MI.getOperand(2).getImm();
4800 int64_t Result = 0;
4801 if (Opc == PPC::ORI || Opc == PPC::ORI8)
4802 Result = LogicalImm | SExtImm;
4803 else
4804 Result = LogicalImm ^ SExtImm;
4805 if (isInt<16>(Result)) {
4806 ReplaceWithLI = true;
4807 Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
4808 NewImm = Result;
4809 break;
4810 }
4811 return false;
4812 }
4813 }
4814
4815 if (ReplaceWithLI) {
4816 // We need to be careful with CR-setting instructions we're replacing.
4817 if (SetCR) {
4818 // We don't know anything about uses when we're out of SSA, so only
4819 // replace if the new immediate will be reproduced.
4820 bool ImmChanged = (SExtImm & NewImm) != NewImm;
4821 if (PostRA && ImmChanged)
4822 return false;
4823
4824 if (!PostRA) {
4825 // If the defining load-immediate has no other uses, we can just replace
4826 // the immediate with the new immediate.
4827 if (MRI->hasOneUse(DefMI.getOperand(0).getReg()))
4828 DefMI.getOperand(1).setImm(NewImm);
4829
4830 // If we're not using the GPR result of the CR-setting instruction, we
4831 // just need to and with zero/non-zero depending on the new immediate.
4832 else if (MRI->use_empty(MI.getOperand(0).getReg())) {
4833 if (NewImm) {
4834 assert(Immediate && "Transformation converted zero to non-zero?");
4835 NewImm = Immediate;
4836 }
4837 } else if (ImmChanged)
4838 return false;
4839 }
4840 }
4841
4842 LLVM_DEBUG(dbgs() << "Replacing constant instruction:\n");
4843 LLVM_DEBUG(MI.dump());
4844 LLVM_DEBUG(dbgs() << "Fed by:\n");
4845 LLVM_DEBUG(DefMI.dump());
4847 LII.Imm = NewImm;
4848 LII.Is64Bit = Is64BitLI;
4849 LII.SetCR = SetCR;
4850 // If we're setting the CR, the original load-immediate must be kept (as an
4851 // operand to ANDI_rec/ANDI8_rec).
4852 if (KilledDef && SetCR)
4853 *KilledDef = nullptr;
4854 replaceInstrWithLI(MI, LII);
4855
4856 if (PostRA)
4857 recomputeLivenessFlags(*MI.getParent());
4858
4859 LLVM_DEBUG(dbgs() << "With:\n");
4860 LLVM_DEBUG(MI.dump());
4861 return true;
4862 }
4863 return false;
4864}
4865
4866bool PPCInstrInfo::transformToNewImmFormFedByAdd(
4867 MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const {
4868 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
4869 bool PostRA = !MRI->isSSA();
4870 // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI
4871 // for post-ra.
4872 if (PostRA)
4873 return false;
4874
4875 // Only handle load/store.
4876 if (!MI.mayLoadOrStore())
4877 return false;
4878
4879 unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode());
4880
4881 assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&
4882 "MI must have x-form opcode");
4883
4884 // get Imm Form info.
4885 ImmInstrInfo III;
4886 bool IsVFReg = MI.getOperand(0).isReg()
4887 ? PPC::isVFRegister(MI.getOperand(0).getReg())
4888 : false;
4889
4890 if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA))
4891 return false;
4892
4893 if (!III.IsSummingOperands)
4894 return false;
4895
4896 if (OpNoForForwarding != III.OpNoForForwarding)
4897 return false;
4898
4899 MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo);
4900 if (!ImmOperandMI.isImm())
4901 return false;
4902
4903 // Check DefMI.
4904 MachineOperand *ImmMO = nullptr;
4905 MachineOperand *RegMO = nullptr;
4906 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4907 return false;
4908 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4909
4910 // Check Imm.
4911 // Set ImmBase from imm instruction as base and get new Imm inside
4912 // isImmElgibleForForwarding.
4913 int64_t ImmBase = ImmOperandMI.getImm();
4914 int64_t Imm = 0;
4915 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase))
4916 return false;
4917
4918 // Do the transform
4919 LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n");
4920 LLVM_DEBUG(MI.dump());
4921 LLVM_DEBUG(dbgs() << "Fed by:\n");
4922 LLVM_DEBUG(DefMI.dump());
4923
4924 MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg());
4925 MI.getOperand(III.ImmOpNo).setImm(Imm);
4926
4927 LLVM_DEBUG(dbgs() << "With:\n");
4928 LLVM_DEBUG(MI.dump());
4929 return true;
4930}
4931
4932// If an X-Form instruction is fed by an add-immediate and one of its operands
4933// is the literal zero, attempt to forward the source of the add-immediate to
4934// the corresponding D-Form instruction with the displacement coming from
4935// the immediate being added.
4936bool PPCInstrInfo::transformToImmFormFedByAdd(
4937 MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
4938 MachineInstr &DefMI, bool KillDefMI) const {
4939 // RegMO ImmMO
4940 // | |
4941 // x = addi reg, imm <----- DefMI
4942 // y = op 0 , x <----- MI
4943 // |
4944 // OpNoForForwarding
4945 // Check if the MI meet the requirement described in the III.
4946 if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
4947 return false;
4948
4949 // Check if the DefMI meet the requirement
4950 // described in the III. If yes, set the ImmMO and RegMO accordingly.
4951 MachineOperand *ImmMO = nullptr;
4952 MachineOperand *RegMO = nullptr;
4953 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4954 return false;
4955 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4956
4957 // As we get the Imm operand now, we need to check if the ImmMO meet
4958 // the requirement described in the III. If yes set the Imm.
4959 int64_t Imm = 0;
4960 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
4961 return false;
4962
4963 bool IsFwdFeederRegKilled = false;
4964 bool SeenIntermediateUse = false;
4965 // Check if the RegMO can be forwarded to MI.
4966 if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
4967 IsFwdFeederRegKilled, SeenIntermediateUse))
4968 return false;
4969
4970 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4971 bool PostRA = !MRI.isSSA();
4972
4973 // We know that, the MI and DefMI both meet the pattern, and
4974 // the Imm also meet the requirement with the new Imm-form.
4975 // It is safe to do the transformation now.
4976 LLVM_DEBUG(dbgs() << "Replacing indexed instruction:\n");
4977 LLVM_DEBUG(MI.dump());
4978 LLVM_DEBUG(dbgs() << "Fed by:\n");
4979 LLVM_DEBUG(DefMI.dump());
4980
4981 // Update the base reg first.
4982 MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),
4983 false, false,
4984 RegMO->isKill());
4985
4986 // Then, update the imm.
4987 if (ImmMO->isImm()) {
4988 // If the ImmMO is Imm, change the operand that has ZERO to that Imm
4989 // directly.
4991 }
4992 else {
4993 // Otherwise, it is Constant Pool Index(CPI) or Global,
4994 // which is relocation in fact. We need to replace the special zero
4995 // register with ImmMO.
4996 // Before that, we need to fixup the target flags for imm.
4997 // For some reason, we miss to set the flag for the ImmMO if it is CPI.
4998 if (DefMI.getOpcode() == PPC::ADDItocL8)
5000
5001 // MI didn't have the interface such as MI.setOperand(i) though
5002 // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
5003 // ImmMO, we need to remove ZERO operand and all the operands behind it,
5004 // and, add the ImmMO, then, move back all the operands behind ZERO.
5006 for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
5007 MOps.push_back(MI.getOperand(i));
5008 MI.removeOperand(i);
5009 }
5010
5011 // Remove the last MO in the list, which is ZERO operand in fact.
5012 MOps.pop_back();
5013 // Add the imm operand.
5014 MI.addOperand(*ImmMO);
5015 // Now add the rest back.
5016 for (auto &MO : MOps)
5017 MI.addOperand(MO);
5018 }
5019
5020 // Update the opcode.
5021 MI.setDesc(get(III.ImmOpcode));
5022
5023 if (PostRA)
5024 recomputeLivenessFlags(*MI.getParent());
5025 LLVM_DEBUG(dbgs() << "With:\n");
5026 LLVM_DEBUG(MI.dump());
5027
5028 return true;
5029}
5030
5031bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
5032 const ImmInstrInfo &III,
5033 unsigned ConstantOpNo,
5034 MachineInstr &DefMI) const {
5035 // DefMI must be LI or LI8.
5036 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
5037 !DefMI.getOperand(1).isImm())
5038 return false;
5039
5040 // Get Imm operand and Sign-extend to 64-bits.
5041 int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm());
5042
5043 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5044 bool PostRA = !MRI.isSSA();
5045 // Exit early if we can't convert this.
5046 if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
5047 return false;
5048 if (Imm % III.ImmMustBeMultipleOf)
5049 return false;
5050 if (III.TruncateImmTo)
5051 Imm &= ((1 << III.TruncateImmTo) - 1);
5052 if (III.SignedImm) {
5053 APInt ActualValue(64, Imm, true);
5054 if (!ActualValue.isSignedIntN(III.ImmWidth))
5055 return false;
5056 } else {
5057 uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
5058 if ((uint64_t)Imm > UnsignedMax)
5059 return false;
5060 }
5061
5062 // If we're post-RA, the instructions don't agree on whether register zero is
5063 // special, we can transform this as long as the register operand that will
5064 // end up in the location where zero is special isn't R0.
5065 if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
5066 unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
5067 III.ZeroIsSpecialNew + 1;
5068 Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
5069 Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
5070 // If R0 is in the operand where zero is special for the new instruction,
5071 // it is unsafe to transform if the constant operand isn't that operand.
5072 if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
5073 ConstantOpNo != III.ZeroIsSpecialNew)
5074 return false;
5075 if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) &&
5076 ConstantOpNo != PosForOrigZero)
5077 return false;
5078 }
5079
5080 unsigned Opc = MI.getOpcode();
5081 bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||
5082 Opc == PPC::SRW || Opc == PPC::SRW_rec ||
5083 Opc == PPC::SLW8 || Opc == PPC::SLW8_rec ||
5084 Opc == PPC::SRW8 || Opc == PPC::SRW8_rec;
5085 bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec ||
5086 Opc == PPC::SRD || Opc == PPC::SRD_rec;
5087 bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec ||
5088 Opc == PPC::SLD_rec || Opc == PPC::SRD_rec;
5089 bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD ||
5090 Opc == PPC::SRD_rec;
5091
5092 LLVM_DEBUG(dbgs() << "Replacing reg+reg instruction: ");
5093 LLVM_DEBUG(MI.dump());
5094 LLVM_DEBUG(dbgs() << "Fed by load-immediate: ");
5095 LLVM_DEBUG(DefMI.dump());
5096 MI.setDesc(get(III.ImmOpcode));
5097 if (ConstantOpNo == III.OpNoForForwarding) {
5098 // Converting shifts to immediate form is a bit tricky since they may do
5099 // one of three things:
5100 // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
5101 // 2. If the shift amount is zero, the result is unchanged (save for maybe
5102 // setting CR0)
5103 // 3. If the shift amount is in [1, OpSize), it's just a shift
5104 if (SpecialShift32 || SpecialShift64) {
5106 LII.Imm = 0;
5107 LII.SetCR = SetCR;
5108 LII.Is64Bit = SpecialShift64;
5109 uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F);
5110 if (Imm & (SpecialShift32 ? 0x20 : 0x40))
5111 replaceInstrWithLI(MI, LII);
5112 // Shifts by zero don't change the value. If we don't need to set CR0,
5113 // just convert this to a COPY. Can't do this post-RA since we've already
5114 // cleaned up the copies.
5115 else if (!SetCR && ShAmt == 0 && !PostRA) {
5116 MI.removeOperand(2);
5117 MI.setDesc(get(PPC::COPY));
5118 } else {
5119 // The 32 bit and 64 bit instructions are quite different.
5120 if (SpecialShift32) {
5121 // Left shifts use (N, 0, 31-N).
5122 // Right shifts use (32-N, N, 31) if 0 < N < 32.
5123 // use (0, 0, 31) if N == 0.
5124 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt;
5125 uint64_t MB = RightShift ? ShAmt : 0;
5126 uint64_t ME = RightShift ? 31 : 31 - ShAmt;
5128 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
5129 .addImm(ME);
5130 } else {
5131 // Left shifts use (N, 63-N).
5132 // Right shifts use (64-N, N) if 0 < N < 64.
5133 // use (0, 0) if N == 0.
5134 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt;
5135 uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
5137 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
5138 }
5139 }
5140 } else
5141 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5142 }
5143 // Convert commutative instructions (switch the operands and convert the
5144 // desired one to an immediate.
5145 else if (III.IsCommutative) {
5146 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5147 swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);
5148 } else
5149 llvm_unreachable("Should have exited early!");
5150
5151 // For instructions for which the constant register replaces a different
5152 // operand than where the immediate goes, we need to swap them.
5153 if (III.OpNoForForwarding != III.ImmOpNo)
5155
5156 // If the special R0/X0 register index are different for original instruction
5157 // and new instruction, we need to fix up the register class in new
5158 // instruction.
5159 if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
5160 if (III.ZeroIsSpecialNew) {
5161 // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
5162 // need to fix up register class.
5163 Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
5164 if (RegToModify.isVirtual()) {
5165 const TargetRegisterClass *NewRC =
5166 MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
5167 &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
5168 MRI.setRegClass(RegToModify, NewRC);
5169 }
5170 }
5171 }
5172
5173 if (PostRA)
5174 recomputeLivenessFlags(*MI.getParent());
5175
5176 LLVM_DEBUG(dbgs() << "With: ");
5177 LLVM_DEBUG(MI.dump());
5178 LLVM_DEBUG(dbgs() << "\n");
5179 return true;
5180}
5181
5182const TargetRegisterClass *
5184 if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
5185 return &PPC::VSRCRegClass;
5186 return RC;
5187}
5188
5190 return PPC::getRecordFormOpcode(Opcode);
5191}
5192
5193static bool isOpZeroOfSubwordPreincLoad(int Opcode) {
5194 return (Opcode == PPC::LBZU || Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 ||
5195 Opcode == PPC::LBZUX8 || Opcode == PPC::LHZU ||
5196 Opcode == PPC::LHZUX || Opcode == PPC::LHZU8 ||
5197 Opcode == PPC::LHZUX8);
5198}
5199
5200// This function checks for sign extension from 32 bits to 64 bits.
5201static bool definedBySignExtendingOp(const unsigned Reg,
5202 const MachineRegisterInfo *MRI) {
5204 return false;
5205
5206 MachineInstr *MI = MRI->getVRegDef(Reg);
5207 if (!MI)
5208 return false;
5209
5210 int Opcode = MI->getOpcode();
5211 const PPCInstrInfo *TII =
5212 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5213 if (TII->isSExt32To64(Opcode))
5214 return true;
5215
5216 // The first def of LBZU/LHZU is sign extended.
5217 if (isOpZeroOfSubwordPreincLoad(Opcode) && MI->getOperand(0).getReg() == Reg)
5218 return true;
5219
5220 // RLDICL generates sign-extended output if it clears at least
5221 // 33 bits from the left (MSB).
5222 if (Opcode == PPC::RLDICL && MI->getOperand(3).getImm() >= 33)
5223 return true;
5224
5225 // If at least one bit from left in a lower word is masked out,
5226 // all of 0 to 32-th bits of the output are cleared.
5227 // Hence the output is already sign extended.
5228 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5229 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) &&
5230 MI->getOperand(3).getImm() > 0 &&
5231 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5232 return true;
5233
5234 // If the most significant bit of immediate in ANDIS is zero,
5235 // all of 0 to 32-th bits are cleared.
5236 if (Opcode == PPC::ANDIS_rec || Opcode == PPC::ANDIS8_rec) {
5237 uint16_t Imm = MI->getOperand(2).getImm();
5238 if ((Imm & 0x8000) == 0)
5239 return true;
5240 }
5241
5242 return false;
5243}
5244
5245// This function checks the machine instruction that defines the input register
5246// Reg. If that machine instruction always outputs a value that has only zeros
5247// in the higher 32 bits then this function will return true.
5248static bool definedByZeroExtendingOp(const unsigned Reg,
5249 const MachineRegisterInfo *MRI) {
5251 return false;
5252
5253 MachineInstr *MI = MRI->getVRegDef(Reg);
5254 if (!MI)
5255 return false;
5256
5257 int Opcode = MI->getOpcode();
5258 const PPCInstrInfo *TII =
5259 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5260 if (TII->isZExt32To64(Opcode))
5261 return true;
5262
5263 // The first def of LBZU/LHZU/LWZU are zero extended.
5264 if ((isOpZeroOfSubwordPreincLoad(Opcode) || Opcode == PPC::LWZU ||
5265 Opcode == PPC::LWZUX || Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8) &&
5266 MI->getOperand(0).getReg() == Reg)
5267 return true;
5268
5269 // The 16-bit immediate is sign-extended in li/lis.
5270 // If the most significant bit is zero, all higher bits are zero.
5271 if (Opcode == PPC::LI || Opcode == PPC::LI8 ||
5272 Opcode == PPC::LIS || Opcode == PPC::LIS8) {
5273 int64_t Imm = MI->getOperand(1).getImm();
5274 if (((uint64_t)Imm & ~0x7FFFuLL) == 0)
5275 return true;
5276 }
5277
5278 // We have some variations of rotate-and-mask instructions
5279 // that clear higher 32-bits.
5280 if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
5281 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec ||
5282 Opcode == PPC::RLDICL_32_64) &&
5283 MI->getOperand(3).getImm() >= 32)
5284 return true;
5285
5286 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
5287 MI->getOperand(3).getImm() >= 32 &&
5288 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
5289 return true;
5290
5291 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5292 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
5293 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
5294 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5295 return true;
5296
5297 return false;
5298}
5299
5300// This function returns true if the input MachineInstr is a TOC save
5301// instruction.
5303 if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg())
5304 return false;
5305 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5306 unsigned StackOffset = MI.getOperand(1).getImm();
5307 Register StackReg = MI.getOperand(2).getReg();
5308 Register SPReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
5309 if (StackReg == SPReg && StackOffset == TOCSaveOffset)
5310 return true;
5311
5312 return false;
5313}
5314
5315// We limit the max depth to track incoming values of PHIs or binary ops
5316// (e.g. AND) to avoid excessive cost.
5317const unsigned MAX_BINOP_DEPTH = 1;
5318
5319// This function will promote the instruction which defines the register `Reg`
5320// in the parameter from a 32-bit to a 64-bit instruction if needed. The logic
5321// used to check whether an instruction needs to be promoted or not is similar
5322// to the logic used to check whether or not a defined register is sign or zero
5323// extended within the function PPCInstrInfo::isSignOrZeroExtended.
5324// Additionally, the `promoteInstr32To64ForElimEXTSW` function is recursive.
5325// BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
5326// incremented only when `promoteInstr32To64ForElimEXTSW` calls itself more
5327// than once. This is done to prevent exponential recursion.
5330 unsigned BinOpDepth,
5331 LiveVariables *LV) const {
5332 if (!Reg.isVirtual())
5333 return;
5334
5335 MachineInstr *MI = MRI->getVRegDef(Reg);
5336 if (!MI)
5337 return;
5338
5339 unsigned Opcode = MI->getOpcode();
5340
5341 switch (Opcode) {
5342 case PPC::OR:
5343 case PPC::ISEL:
5344 case PPC::OR8:
5345 case PPC::PHI: {
5346 if (BinOpDepth >= MAX_BINOP_DEPTH)
5347 break;
5348 unsigned OperandEnd = 3, OperandStride = 1;
5349 if (Opcode == PPC::PHI) {
5350 OperandEnd = MI->getNumOperands();
5351 OperandStride = 2;
5352 }
5353
5354 for (unsigned I = 1; I < OperandEnd; I += OperandStride) {
5355 assert(MI->getOperand(I).isReg() && "Operand must be register");
5356 promoteInstr32To64ForElimEXTSW(MI->getOperand(I).getReg(), MRI,
5357 BinOpDepth + 1, LV);
5358 }
5359
5360 break;
5361 }
5362 case PPC::COPY: {
5363 // Refers to the logic of the `case PPC::COPY` statement in the function
5364 // PPCInstrInfo::isSignOrZeroExtended().
5365
5366 Register SrcReg = MI->getOperand(1).getReg();
5367 // In both ELFv1 and v2 ABI, method parameters and the return value
5368 // are sign- or zero-extended.
5369 const MachineFunction *MF = MI->getMF();
5370 if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5371 // If this is a copy from another register, we recursively promote the
5372 // source.
5373 promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
5374 return;
5375 }
5376
5377 // From here on everything is SVR4ABI. COPY will be eliminated in the other
5378 // pass, we do not need promote the COPY pseudo opcode.
5379
5380 if (SrcReg != PPC::X3)
5381 // If this is a copy from another register, we recursively promote the
5382 // source.
5383 promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
5384 return;
5385 }
5386 case PPC::ORI:
5387 case PPC::XORI:
5388 case PPC::ORIS:
5389 case PPC::XORIS:
5390 case PPC::ORI8:
5391 case PPC::XORI8:
5392 case PPC::ORIS8:
5393 case PPC::XORIS8:
5394 promoteInstr32To64ForElimEXTSW(MI->getOperand(1).getReg(), MRI, BinOpDepth,
5395 LV);
5396 break;
5397 case PPC::AND:
5398 case PPC::AND8:
5399 if (BinOpDepth >= MAX_BINOP_DEPTH)
5400 break;
5401
5402 promoteInstr32To64ForElimEXTSW(MI->getOperand(1).getReg(), MRI,
5403 BinOpDepth + 1, LV);
5404 promoteInstr32To64ForElimEXTSW(MI->getOperand(2).getReg(), MRI,
5405 BinOpDepth + 1, LV);
5406 break;
5407 }
5408
5409 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
5410 if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
5411 return;
5412
5413 const PPCInstrInfo *TII =
5414 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5415
5416 // Map the 32bit to 64bit opcodes for instructions that are not signed or zero
5417 // extended themselves, but may have operands who's destination registers of
5418 // signed or zero extended instructions.
5419 std::unordered_map<unsigned, unsigned> OpcodeMap = {
5420 {PPC::OR, PPC::OR8}, {PPC::ISEL, PPC::ISEL8},
5421 {PPC::ORI, PPC::ORI8}, {PPC::XORI, PPC::XORI8},
5422 {PPC::ORIS, PPC::ORIS8}, {PPC::XORIS, PPC::XORIS8},
5423 {PPC::AND, PPC::AND8}};
5424
5425 int NewOpcode = -1;
5426 auto It = OpcodeMap.find(Opcode);
5427 if (It != OpcodeMap.end()) {
5428 // Set the new opcode to the mapped 64-bit version.
5429 NewOpcode = It->second;
5430 } else {
5431 if (!TII->isSExt32To64(Opcode))
5432 return;
5433
5434 // The TableGen function `get64BitInstrFromSignedExt32BitInstr` is used to
5435 // map the 32-bit instruction with the `SExt32To64` flag to the 64-bit
5436 // instruction with the same opcode.
5437 NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);
5438 }
5439
5440 assert(NewOpcode != -1 &&
5441 "Must have a 64-bit opcode to map the 32-bit opcode!");
5442
5443 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
5444 const MCInstrDesc &MCID = TII->get(NewOpcode);
5445 const TargetRegisterClass *NewRC =
5446 TRI->getRegClass(MCID.operands()[0].RegClass);
5447
5448 Register SrcReg = MI->getOperand(0).getReg();
5449 const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
5450
5451 // If the register class of the defined register in the 32-bit instruction
5452 // is the same as the register class of the defined register in the promoted
5453 // 64-bit instruction, we do not need to promote the instruction.
5454 if (NewRC == SrcRC)
5455 return;
5456
5457 DebugLoc DL = MI->getDebugLoc();
5458 auto MBB = MI->getParent();
5459
5460 // Since the pseudo-opcode of the instruction is promoted from 32-bit to
5461 // 64-bit, if the source reg class of the original instruction belongs to
5462 // PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
5463 // the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
5464 // respectively.
5465 DenseMap<unsigned, Register> PromoteRegs;
5466 for (unsigned i = 1; i < MI->getNumOperands(); i++) {
5467 MachineOperand &Operand = MI->getOperand(i);
5468 if (!Operand.isReg())
5469 continue;
5470
5471 Register OperandReg = Operand.getReg();
5472 if (!OperandReg.isVirtual())
5473 continue;
5474
5475 const TargetRegisterClass *NewUsedRegRC =
5476 TRI->getRegClass(MCID.operands()[i].RegClass);
5477 const TargetRegisterClass *OrgRC = MRI->getRegClass(OperandReg);
5478 if (NewUsedRegRC != OrgRC && (OrgRC == &PPC::GPRCRegClass ||
5479 OrgRC == &PPC::GPRC_and_GPRC_NOR0RegClass)) {
5480 // Promote the used 32-bit register to 64-bit register.
5481 Register TmpReg = MRI->createVirtualRegister(NewUsedRegRC);
5482 Register DstTmpReg = MRI->createVirtualRegister(NewUsedRegRC);
5483 BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
5484 BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
5485 .addReg(TmpReg)
5486 .addReg(OperandReg)
5487 .addImm(PPC::sub_32);
5488 PromoteRegs[i] = DstTmpReg;
5489 }
5490 }
5491
5492 Register NewDefinedReg = MRI->createVirtualRegister(NewRC);
5493
5494 BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewDefinedReg);
5496 --Iter;
5497 MachineInstrBuilder MIBuilder(*Iter->getMF(), Iter);
5498 for (unsigned i = 1; i < MI->getNumOperands(); i++) {
5499 if (auto It = PromoteRegs.find(i); It != PromoteRegs.end())
5500 MIBuilder.addReg(It->second, RegState::Kill);
5501 else
5502 Iter->addOperand(MI->getOperand(i));
5503 }
5504
5505 for (unsigned i = 1; i < Iter->getNumOperands(); i++) {
5506 MachineOperand &Operand = Iter->getOperand(i);
5507 if (!Operand.isReg())
5508 continue;
5509 Register OperandReg = Operand.getReg();
5510 if (!OperandReg.isVirtual())
5511 continue;
5512 LV->recomputeForSingleDefVirtReg(OperandReg);
5513 }
5514
5515 MI->eraseFromParent();
5516
5517 // A defined register may be used by other instructions that are 32-bit.
5518 // After the defined register is promoted to 64-bit for the promoted
5519 // instruction, we need to demote the 64-bit defined register back to a
5520 // 32-bit register
5521 BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
5522 .addReg(NewDefinedReg, RegState::Kill, PPC::sub_32);
5523 LV->recomputeForSingleDefVirtReg(NewDefinedReg);
5524}
5525
5526// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
5527// does not count all of the recursions. The parameter BinOpDepth is incremented
5528// only when isSignOrZeroExtended calls itself more than once. This is done to
5529// prevent expontential recursion. There is no parameter to track linear
5530// recursion.
5531std::pair<bool, bool>
5533 const unsigned BinOpDepth,
5534 const MachineRegisterInfo *MRI) const {
5536 return std::pair<bool, bool>(false, false);
5537
5538 MachineInstr *MI = MRI->getVRegDef(Reg);
5539 if (!MI)
5540 return std::pair<bool, bool>(false, false);
5541
5542 bool IsSExt = definedBySignExtendingOp(Reg, MRI);
5543 bool IsZExt = definedByZeroExtendingOp(Reg, MRI);
5544
5545 // If we know the instruction always returns sign- and zero-extended result,
5546 // return here.
5547 if (IsSExt && IsZExt)
5548 return std::pair<bool, bool>(IsSExt, IsZExt);
5549
5550 switch (MI->getOpcode()) {
5551 case PPC::COPY: {
5552 Register SrcReg = MI->getOperand(1).getReg();
5553
5554 // In both ELFv1 and v2 ABI, method parameters and the return value
5555 // are sign- or zero-extended.
5556 const MachineFunction *MF = MI->getMF();
5557
5558 if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5559 // If this is a copy from another register, we recursively check source.
5560 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5561 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5562 SrcExt.second || IsZExt);
5563 }
5564
5565 // From here on everything is SVR4ABI
5566 const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
5567 // We check the ZExt/SExt flags for a method parameter.
5568 if (MI->getParent()->getBasicBlock() ==
5569 &MF->getFunction().getEntryBlock()) {
5570 Register VReg = MI->getOperand(0).getReg();
5571 if (MF->getRegInfo().isLiveIn(VReg)) {
5572 IsSExt |= FuncInfo->isLiveInSExt(VReg);
5573 IsZExt |= FuncInfo->isLiveInZExt(VReg);
5574 return std::pair<bool, bool>(IsSExt, IsZExt);
5575 }
5576 }
5577
5578 if (SrcReg != PPC::X3) {
5579 // If this is a copy from another register, we recursively check source.
5580 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5581 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5582 SrcExt.second || IsZExt);
5583 }
5584
5585 // For a method return value, we check the ZExt/SExt flags in attribute.
5586 // We assume the following code sequence for method call.
5587 // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1
5588 // BL8_NOP @func,...
5589 // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1
5590 // %5 = COPY %x3; G8RC:%5
5591 const MachineBasicBlock *MBB = MI->getParent();
5592 std::pair<bool, bool> IsExtendPair = std::pair<bool, bool>(IsSExt, IsZExt);
5595 if (II == MBB->instr_begin() || (--II)->getOpcode() != PPC::ADJCALLSTACKUP)
5596 return IsExtendPair;
5597
5598 const MachineInstr &CallMI = *(--II);
5599 if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
5600 return IsExtendPair;
5601
5602 const Function *CalleeFn =
5603 dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());
5604 if (!CalleeFn)
5605 return IsExtendPair;
5606 const IntegerType *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
5607 if (IntTy && IntTy->getBitWidth() <= 32) {
5608 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
5609 IsSExt |= Attrs.hasAttribute(Attribute::SExt);
5610 IsZExt |= Attrs.hasAttribute(Attribute::ZExt);
5611 return std::pair<bool, bool>(IsSExt, IsZExt);
5612 }
5613
5614 return IsExtendPair;
5615 }
5616
5617 // OR, XOR with 16-bit immediate does not change the upper 48 bits.
5618 // So, we track the operand register as we do for register copy.
5619 case PPC::ORI:
5620 case PPC::XORI:
5621 case PPC::ORI8:
5622 case PPC::XORI8: {
5623 Register SrcReg = MI->getOperand(1).getReg();
5624 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5625 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5626 SrcExt.second || IsZExt);
5627 }
5628
5629 // OR, XOR with shifted 16-bit immediate does not change the upper
5630 // 32 bits. So, we track the operand register for zero extension.
5631 // For sign extension when the MSB of the immediate is zero, we also
5632 // track the operand register since the upper 33 bits are unchanged.
5633 case PPC::ORIS:
5634 case PPC::XORIS:
5635 case PPC::ORIS8:
5636 case PPC::XORIS8: {
5637 Register SrcReg = MI->getOperand(1).getReg();
5638 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5639 uint16_t Imm = MI->getOperand(2).getImm();
5640 if (Imm & 0x8000)
5641 return std::pair<bool, bool>(false, SrcExt.second || IsZExt);
5642 else
5643 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5644 SrcExt.second || IsZExt);
5645 }
5646
5647 // If all incoming values are sign-/zero-extended,
5648 // the output of OR, ISEL or PHI is also sign-/zero-extended.
5649 case PPC::OR:
5650 case PPC::OR8:
5651 case PPC::ISEL:
5652 case PPC::PHI: {
5653 if (BinOpDepth >= MAX_BINOP_DEPTH)
5654 return std::pair<bool, bool>(false, false);
5655
5656 // The input registers for PHI are operand 1, 3, ...
5657 // The input registers for others are operand 1 and 2.
5658 unsigned OperandEnd = 3, OperandStride = 1;
5659 if (MI->getOpcode() == PPC::PHI) {
5660 OperandEnd = MI->getNumOperands();
5661 OperandStride = 2;
5662 }
5663
5664 IsSExt = true;
5665 IsZExt = true;
5666 for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
5667 if (!MI->getOperand(I).isReg())
5668 return std::pair<bool, bool>(false, false);
5669
5670 Register SrcReg = MI->getOperand(I).getReg();
5671 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth + 1, MRI);
5672 IsSExt &= SrcExt.first;
5673 IsZExt &= SrcExt.second;
5674 }
5675 return std::pair<bool, bool>(IsSExt, IsZExt);
5676 }
5677
5678 // If at least one of the incoming values of an AND is zero extended
5679 // then the output is also zero-extended. If both of the incoming values
5680 // are sign-extended then the output is also sign extended.
5681 case PPC::AND:
5682 case PPC::AND8: {
5683 if (BinOpDepth >= MAX_BINOP_DEPTH)
5684 return std::pair<bool, bool>(false, false);
5685
5686 Register SrcReg1 = MI->getOperand(1).getReg();
5687 Register SrcReg2 = MI->getOperand(2).getReg();
5688 auto Src1Ext = isSignOrZeroExtended(SrcReg1, BinOpDepth + 1, MRI);
5689 auto Src2Ext = isSignOrZeroExtended(SrcReg2, BinOpDepth + 1, MRI);
5690 return std::pair<bool, bool>(Src1Ext.first && Src2Ext.first,
5691 Src1Ext.second || Src2Ext.second);
5692 }
5693
5694 default:
5695 break;
5696 }
5697 return std::pair<bool, bool>(IsSExt, IsZExt);
5698}
5699
5700bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
5701 return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
5702}
5703
5704namespace {
5705class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
5706 MachineInstr *Loop, *EndLoop, *LoopCount;
5707 MachineFunction *MF;
5708 const TargetInstrInfo *TII;
5709 int64_t TripCount;
5710
5711public:
5712 PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
5713 MachineInstr *LoopCount)
5714 : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
5715 MF(Loop->getParent()->getParent()),
5716 TII(MF->getSubtarget().getInstrInfo()) {
5717 // Inspect the Loop instruction up-front, as it may be deleted when we call
5718 // createTripCountGreaterCondition.
5719 if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI)
5720 TripCount = LoopCount->getOperand(1).getImm();
5721 else
5722 TripCount = -1;
5723 }
5724
5725 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
5726 // Only ignore the terminator.
5727 return MI == EndLoop;
5728 }
5729
5730 std::optional<bool> createTripCountGreaterCondition(
5731 int TC, MachineBasicBlock &MBB,
5733 if (TripCount == -1) {
5734 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5735 // so we don't need to generate any thing here.
5736 Cond.push_back(MachineOperand::CreateImm(0));
5738 MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
5739 true));
5740 return {};
5741 }
5742
5743 return TripCount > TC;
5744 }
5745
5746 void setPreheader(MachineBasicBlock *NewPreheader) override {
5747 // Do nothing. We want the LOOP setup instruction to stay in the *old*
5748 // preheader, so we can use BDZ in the prologs to adapt the loop trip count.
5749 }
5750
5751 void adjustTripCount(int TripCountAdjust) override {
5752 // If the loop trip count is a compile-time value, then just change the
5753 // value.
5754 if (LoopCount->getOpcode() == PPC::LI8 ||
5755 LoopCount->getOpcode() == PPC::LI) {
5756 int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;
5757 LoopCount->getOperand(1).setImm(TripCount);
5758 return;
5759 }
5760
5761 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5762 // so we don't need to generate any thing here.
5763 }
5764
5765 void disposed(LiveIntervals *LIS) override {
5766 if (LIS) {
5768 LIS->RemoveMachineInstrFromMaps(*LoopCount);
5769 }
5770 Loop->eraseFromParent();
5771 // Ensure the loop setup instruction is deleted too.
5772 LoopCount->eraseFromParent();
5773 }
5774};
5775} // namespace
5776
5777std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
5779 // We really "analyze" only hardware loops right now.
5781 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
5782 if (Preheader == LoopBB)
5783 Preheader = *std::next(LoopBB->pred_begin());
5784 MachineFunction *MF = Preheader->getParent();
5785
5786 if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {
5788 if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {
5789 Register LoopCountReg = LoopInst->getOperand(0).getReg();
5791 MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
5792 return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);
5793 }
5794 }
5795 return nullptr;
5796}
5797
5799 MachineBasicBlock &PreHeader,
5800 SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
5801
5802 unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
5803
5804 // The loop set-up instruction should be in preheader
5805 for (auto &I : PreHeader.instrs())
5806 if (I.getOpcode() == LOOPi)
5807 return &I;
5808 return nullptr;
5809}
5810
5811// Return true if get the base operand, byte offset of an instruction and the
5812// memory width. Width is the size of memory that is being loaded/stored.
5814 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
5815 LocationSize &Width, const TargetRegisterInfo *TRI) const {
5816 if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3)
5817 return false;
5818
5819 // Handle only loads/stores with base register followed by immediate offset.
5820 if (!LdSt.getOperand(1).isImm() ||
5821 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5822 return false;
5823 if (!LdSt.getOperand(1).isImm() ||
5824 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5825 return false;
5826
5827 if (!LdSt.hasOneMemOperand())
5828 return false;
5829
5830 Width = (*LdSt.memoperands_begin())->getSize();
5831 Offset = LdSt.getOperand(1).getImm();
5832 BaseReg = &LdSt.getOperand(2);
5833 return true;
5834}
5835
5837 const MachineInstr &MIa, const MachineInstr &MIb) const {
5838 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
5839 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
5840
5843 return false;
5844
5845 // Retrieve the base register, offset from the base register and width. Width
5846 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
5847 // base registers are identical, and the offset of a lower memory access +
5848 // the width doesn't overlap the offset of a higher memory access,
5849 // then the memory accesses are different.
5851 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
5852 int64_t OffsetA = 0, OffsetB = 0;
5854 WidthB = LocationSize::precise(0);
5855 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
5856 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
5857 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
5858 int LowOffset = std::min(OffsetA, OffsetB);
5859 int HighOffset = std::max(OffsetA, OffsetB);
5860 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
5861 if (LowWidth.hasValue() &&
5862 LowOffset + (int)LowWidth.getValue() <= HighOffset)
5863 return true;
5864 }
5865 }
5866 return false;
5867}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
static bool isOpZeroOfSubwordPreincLoad(int Opcode)
static bool MBBDefinesCTR(MachineBasicBlock &MBB)
static bool definedByZeroExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< float > FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5), cl::desc("register pressure factor for the transformations."))
#define InfoArrayIdxMULOpIdx
static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc, unsigned TrueReg, unsigned FalseReg, unsigned CRSubReg)
static unsigned getCRBitValue(unsigned CRBit)
static bool isAnImmediateOperand(const MachineOperand &MO)
static const uint16_t FMAOpIdxInfo[][6]
static cl::opt< bool > DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops"))
#define InfoArrayIdxAddOpIdx
static cl::opt< bool > UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, cl::desc("Use the old (incorrect) instruction latency calculation"))
#define InfoArrayIdxFMAInst
static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, const PPCSubtarget &Subtarget)
static cl::opt< bool > EnableFMARegPressureReduction("ppc-fma-rp-reduction", cl::Hidden, cl::init(true), cl::desc("enable register pressure reduce in machine combiner pass."))
static bool isLdStSafeToCluster(const MachineInstr &LdSt, const TargetRegisterInfo *TRI)
const unsigned MAX_BINOP_DEPTH
static cl::opt< bool > DisableCmpOpt("disable-ppc-cmp-opt", cl::desc("Disable compare instruction optimization"), cl::Hidden)
#define InfoArrayIdxFSubInst
#define InfoArrayIdxFAddInst
#define InfoArrayIdxFMULInst
static bool definedBySignExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< bool > VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), cl::Hidden)
static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:480
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static unsigned getSize(unsigned Kind)
void changeSign()
Definition: APFloat.h:1297
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
LLVM_ABI APInt rotl(unsigned rotateAmt) const
Rotate left by rotateAmt.
Definition: APInt.cpp:1141
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
LLVM_ABI AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:846
A debug info location.
Definition: DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
iterator end()
Definition: DenseMap.h:87
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:352
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:214
A possibly irreducible generalization of a Loop.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:132
Itinerary data supplied by a subtarget to be used by a target.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
Class to represent integer types.
Definition: DerivedTypes.h:42
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LLVM_ABI void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:188
void setOpcode(unsigned Op)
Definition: MCInst.h:201
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:199
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:238
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:240
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:581
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
Definition: MCInstrDesc.h:567
bool isPseudo() const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
Definition: MCInstrDesc.h:270
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:86
uint16_t Constraints
Operand constraints (see OperandConstraint enum).
Definition: MCInstrDesc.h:101
bool isLookupPtrRegClass() const
Set if this operand is a pointer value and it requires a callback to look up its register class.
Definition: MCInstrDesc.h:105
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:92
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
instr_iterator instr_begin()
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Instructions::iterator instr_iterator
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:948
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:409
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
Definition: MachineInstr.h:642
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
mop_range operands()
Definition: MachineInstr.h:693
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:813
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:798
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
mop_range uses()
Returns all operands which may be register uses.
Definition: MachineInstr.h:731
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:511
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void dump() const
LLVM_ABI void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:404
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool isLiveIn(Register Reg) const
PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based hazard recognizer for P...
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isLiveInSExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and sign-extended.
bool isLiveInZExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and zero-extended.
PPCHazardRecognizer970 - This class defines a finite state automata that models the dispatch logic on...
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
PPCInstrInfo(PPCSubtarget &STI)
bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for a fma chain ending in Root.
bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase=nullptr) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
const TargetRegisterClass * updatedRC(const TargetRegisterClass *RC) const
bool isPredicated(const MachineInstr &MI) const override
bool expandVSXMemPseudo(MachineInstr &MI) const
bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg) const
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
void finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs) const override
Fixup the placeholders we put in genAlternativeCodeSequence() for MachineCombiner.
MCInst getNop() const override
Return the noop instruction to use for a noop.
static int getRecordFormOpcode(unsigned Opcode)
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool isXFormMemOp(unsigned Opcode) const
Definition: PPCInstrInfo.h:383
const PPCRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: PPCInstrInfo.h:381
CombinerObjective getCombinerObjective(unsigned Pattern) const override
void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
unsigned getStoreOpcodeForSpill(const TargetRegisterClass *RC) const
unsigned getLoadOpcodeForSpill(const TargetRegisterClass *RC) const
void promoteInstr32To64ForElimEXTSW(const Register &Reg, MachineRegisterInfo *MRI, unsigned BinOpDepth, LiveVariables *LV) const
bool isTOCSaveMI(const MachineInstr &MI) const
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer to use for this target when ...
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
bool isBDNZ(unsigned Opcode) const
Check Opcode is BDNZ (Decrement CTR and branch if it is still nonzero).
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
bool isZeroExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:733
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
std::pair< bool, bool > isSignOrZeroExtended(const unsigned Reg, const unsigned BinOpDepth, const MachineRegisterInfo *MRI) const
bool expandPostRAPseudo(MachineInstr &MI) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
Definition: PPCInstrInfo.h:616
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index, MachineInstr *&ADDIMI, int64_t &OffsetAddi, int64_t OffsetImm) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t Mask, int64_t Value, const MachineRegisterInfo *MRI) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
void materializeImmPostRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, int64_t Imm) const
bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
Return true if two MIs access different memory addresses and false otherwise.
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
CreateTargetHazardRecognizer - Return the hazard recognizer to use for this target when scheduling th...
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base operand and byte offset of an instruction that reads/writes memory.
void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const
bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const
void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
bool foldFrameOffset(MachineInstr &MI) const
bool isLoadFromConstantPool(MachineInstr *I) const
MachineInstr * findLoopInstr(MachineBasicBlock &PreHeader, SmallPtrSet< MachineBasicBlock *, 8 > &Visited) const
Find the hardware loop instruction used to set-up the specified loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool convertToImmediateForm(MachineInstr &MI, SmallSet< Register, 4 > &RegsToUpdate, MachineInstr **KilledDef=nullptr) const
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, LocationSize &Width, const TargetRegisterInfo *TRI) const
Return true if get the base operand, byte offset of an instruction and the memory width.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
bool shouldReduceRegisterPressure(const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const override
On PowerPC, we leverage machine combiner pass to reduce register pressure when the register pressure ...
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
bool isSignExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:727
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Returns true if the two given memory operations should be scheduled adjacent.
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const
bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, unsigned &XFormOpcode, int64_t &OffsetOfImmInstr, ImmInstrInfo &III) const
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in <Root...
bool optimizeCmpPostRA(MachineInstr &MI) const
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
const Constant * getConstantFromConstantPool(MachineInstr *I) const
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const
MachineInstr * getDefMIPostRA(unsigned Reg, MachineInstr &MI, bool &SeenIntermediateUse) const
unsigned getMappedIdxOpcForImmOpc(unsigned ImmOpcode) const
getMappedIdxOpcForImmOpc - Return the mapped index form load/store opcode for a given imm form load/s...
static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg, MCRegister SrcReg)
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:147
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isSVR4ABI() const
Definition: PPCSubtarget.h:220
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:139
bool isLittleEndian() const
Definition: PPCSubtarget.h:186
bool isTargetLinux() const
Definition: PPCSubtarget.h:217
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:160
const Triple & getTargetTriple() const
Definition: PPCSubtarget.h:213
void setGlibcHWCAPAccess(bool Val=true) const
void dump() const
Definition: Pass.cpp:146
MI-level patchpoint operands.
Definition: StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:105
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
LLVM_ABI void recede(SmallVectorImpl< VRegMaskOrUnit > *LiveUses=nullptr)
Recede across the previous instruction.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
LLVM_ABI void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
LLVM_ABI void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:61
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:584
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:586
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:806
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
MI-level stackmap operands.
Definition: StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:34
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition: Triple.h:750
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVM Value Representation.
Definition: Value.h:75
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:953
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ MO_TOC_LO
Definition: PPC.h:185
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
int getAltVSXFMAOpcode(uint16_t Opcode)
int getNonRecordFormOpcode(uint16_t)
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:77
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:82
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
static bool isVFRegister(unsigned Reg)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
template class LLVM_TEMPLATE_ABI opt< bool >
Definition: CommandLine.cpp:79
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
constexpr double e
Definition: MathExtras.h:47
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getDeadRegState(bool B)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
static unsigned getCRFromCRBit(unsigned SrcReg)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
Definition: SPIRVUtils.cpp:976
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
@ REASSOC_XY_BCA
Definition: PPCInstrInfo.h:99
@ REASSOC_XY_BAC
Definition: PPCInstrInfo.h:100
@ REASSOC_XY_AMM_BMM
Definition: PPCInstrInfo.h:94
@ REASSOC_XMM_AMM_BMM
Definition: PPCInstrInfo.h:95
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
@ SOK_CRBitSpill
Definition: PPCInstrInfo.h:74
@ SOK_VSXVectorSpill
Definition: PPCInstrInfo.h:76
@ SOK_SpillToVSR
Definition: PPCInstrInfo.h:79
@ SOK_Int4Spill
Definition: PPCInstrInfo.h:69
@ SOK_PairedVecSpill
Definition: PPCInstrInfo.h:80
@ SOK_VectorFloat8Spill
Definition: PPCInstrInfo.h:77
@ SOK_UAccumulatorSpill
Definition: PPCInstrInfo.h:82
@ SOK_PairedG8Spill
Definition: PPCInstrInfo.h:87
@ SOK_DMRSpill
Definition: PPCInstrInfo.h:85
@ SOK_VectorFloat4Spill
Definition: PPCInstrInfo.h:78
@ SOK_Float8Spill
Definition: PPCInstrInfo.h:71
@ SOK_Float4Spill
Definition: PPCInstrInfo.h:72
@ SOK_VRVectorSpill
Definition: PPCInstrInfo.h:75
@ SOK_WAccumulatorSpill
Definition: PPCInstrInfo.h:83
@ SOK_SPESpill
Definition: PPCInstrInfo.h:86
@ SOK_CRSpill
Definition: PPCInstrInfo.h:73
@ SOK_AccumulatorSpill
Definition: PPCInstrInfo.h:81
@ SOK_Int8Spill
Definition: PPCInstrInfo.h:70
@ SOK_LastOpcodeSpill
Definition: PPCInstrInfo.h:88
@ SOK_DMRpSpill
Definition: PPCInstrInfo.h:84
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t IsSummingOperands
Definition: PPCInstrInfo.h:56
uint64_t OpNoForForwarding
Definition: PPCInstrInfo.h:46
uint64_t ImmMustBeMultipleOf
Definition: PPCInstrInfo.h:36
uint64_t IsCommutative
Definition: PPCInstrInfo.h:44
uint64_t ZeroIsSpecialNew
Definition: PPCInstrInfo.h:42
uint64_t TruncateImmTo
Definition: PPCInstrInfo.h:54
uint64_t ZeroIsSpecialOrig
Definition: PPCInstrInfo.h:39
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.