LLVM 22.0.0git
Thumb2SizeReduction.cpp
Go to the documentation of this file.
1//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ARM.h"
10#include "ARMBaseInstrInfo.h"
11#include "ARMSubtarget.h"
13#include "Thumb2InstrInfo.h"
14#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallSet.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/ADT/StringRef.h"
28#include "llvm/IR/DebugLoc.h"
29#include "llvm/IR/Function.h"
30#include "llvm/MC/MCAsmInfo.h"
31#include "llvm/MC/MCInstrDesc.h"
34#include "llvm/Support/Debug.h"
37#include <cassert>
38#include <cstdint>
39#include <functional>
40#include <iterator>
41#include <utility>
42
43using namespace llvm;
44
45#define DEBUG_TYPE "thumb2-reduce-size"
46#define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass"
47
48STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
49STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
50STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
51
52static cl::opt<int> ReduceLimit("t2-reduce-limit",
53 cl::init(-1), cl::Hidden);
54static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
55 cl::init(-1), cl::Hidden);
56static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
57 cl::init(-1), cl::Hidden);
58
59namespace {
60
61 /// ReduceTable - A static table with information on mapping from wide
62 /// opcodes to narrow
63 struct ReduceEntry {
64 uint16_t WideOpc; // Wide opcode
65 uint16_t NarrowOpc1; // Narrow opcode to transform to
66 uint16_t NarrowOpc2; // Narrow opcode when it's two-address
67 uint8_t Imm1Limit; // Limit of immediate field (bits)
68 uint8_t Imm2Limit; // Limit of immediate field when it's two-address
69 unsigned LowRegs1 : 1; // Only possible if low-registers are used
70 unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
71 unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa.
72 // 1 - No cc field.
73 // 2 - Always set CPSR.
74 unsigned PredCC2 : 2;
75 unsigned PartFlag : 1; // 16-bit instruction does partial flag update
76 unsigned Special : 1; // Needs to be dealt with specially
77 unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
78 };
79
80 static const ReduceEntry ReduceTable[] = {
81 // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM
82 { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 },
83 { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 },
84 { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 },
85 { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 },
86 { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
87 { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 },
88 { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
89 { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
90 { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 },
91 //FIXME: Disable CMN, as CCodes are backwards from compare expectations
92 //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
93 { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
94 { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 },
95 { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 },
96 { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 },
97 // FIXME: adr.n immediate offset must be multiple of 4.
98 //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
99 { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
100 { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 },
101 { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
102 { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
103 { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 },
104 { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 },
105 // FIXME: Do we need the 16-bit 'S' variant?
106 { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 },
107 { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 },
108 { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
109 { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 },
110 { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
111 { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
112 { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
113 { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 },
114 { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
115 { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
116 { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 },
117 { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 },
118 { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
119 { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 },
120 { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
121 { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
122 { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
123 { ARM::t2TEQrr, ARM::tEOR, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
124 { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
125 { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
126 { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
127
128 // FIXME: Clean this up after splitting each Thumb load / store opcode
129 // into multiple ones.
130 { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
131 { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
132 { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
133 { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
134 { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
135 { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
136 { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
137 { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
138 { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
139 { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
140 { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
141 { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
142 { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
143 { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
144 { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
145 { ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
146
147 { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
148 { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
149 { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
150 // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
151 // tSTMIA_UPD is a change in semantics which can only be used if the base
152 // register is killed. This difference is correctly handled elsewhere.
153 { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
154 { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
155 { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
156 };
157
158 class Thumb2SizeReduce : public MachineFunctionPass {
159 public:
160 static char ID;
161
162 const Thumb2InstrInfo *TII;
163 const ARMSubtarget *STI;
164
165 Thumb2SizeReduce(std::function<bool(const Function &)> Ftor = nullptr);
166
167 bool runOnMachineFunction(MachineFunction &MF) override;
168
170 return MachineFunctionProperties().setNoVRegs();
171 }
172
173 StringRef getPassName() const override {
175 }
176
177 private:
178 /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
179 DenseMap<unsigned, unsigned> ReduceOpcodeMap;
180
181 bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
182
183 bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
184 bool is2Addr, ARMCC::CondCodes Pred,
185 bool LiveCPSR, bool &HasCC, bool &CCDead);
186
187 bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
188 const ReduceEntry &Entry);
189
190 bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
191 const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
192
193 /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
194 /// instruction.
195 bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
196 const ReduceEntry &Entry, bool LiveCPSR,
197 bool IsSelfLoop);
198
199 /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
200 /// non-two-address instruction.
201 bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
202 const ReduceEntry &Entry, bool LiveCPSR,
203 bool IsSelfLoop);
204
205 /// ReduceMI - Attempt to reduce MI, return true on success.
206 bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, bool LiveCPSR,
207 bool IsSelfLoop, bool SkipPrologueEpilogue);
208
209 /// ReduceMBB - Reduce width of instructions in the specified basic block.
210 bool ReduceMBB(MachineBasicBlock &MBB, bool SkipPrologueEpilogue);
211
212 bool OptimizeSize;
213 bool MinimizeSize;
214
215 // Last instruction to define CPSR in the current block.
216 MachineInstr *CPSRDef;
217 // Was CPSR last defined by a high latency instruction?
218 // When CPSRDef is null, this refers to CPSR defs in predecessors.
219 bool HighLatencyCPSR;
220
221 struct MBBInfo {
222 // The flags leaving this block have high latency.
223 bool HighLatencyCPSR = false;
224 // Has this block been visited yet?
225 bool Visited = false;
226
227 MBBInfo() = default;
228 };
229
230 SmallVector<MBBInfo, 8> BlockInfo;
231
232 std::function<bool(const Function &)> PredicateFtor;
233 };
234
235 char Thumb2SizeReduce::ID = 0;
236
237} // end anonymous namespace
238
240 false)
241
242Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
243 : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
244 OptimizeSize = MinimizeSize = false;
245 for (unsigned i = 0, e = std::size(ReduceTable); i != e; ++i) {
246 unsigned FromOpc = ReduceTable[i].WideOpc;
247 if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
248 llvm_unreachable("Duplicated entries?");
249 }
250}
251
252static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
253 return is_contained(MCID.implicit_defs(), ARM::CPSR);
254}
255
256// Check for a likely high-latency flag def.
258 switch(Def->getOpcode()) {
259 case ARM::FMSTAT:
260 case ARM::tMUL:
261 return true;
262 }
263 return false;
264}
265
266/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
267/// the 's' 16-bit instruction partially update CPSR. Abort the
268/// transformation to avoid adding false dependency on last CPSR setting
269/// instruction which hurts the ability for out-of-order execution engine
270/// to do register renaming magic.
271/// This function checks if there is a read-of-write dependency between the
272/// last instruction that defines the CPSR and the current instruction. If there
273/// is, then there is no harm done since the instruction cannot be retired
274/// before the CPSR setting instruction anyway.
275/// Note, we are not doing full dependency analysis here for the sake of compile
276/// time. We're not looking for cases like:
277/// r0 = muls ...
278/// r1 = add.w r0, ...
279/// ...
280/// = mul.w r1
281/// In this case it would have been ok to narrow the mul.w to muls since there
282/// are indirect RAW dependency between the muls and the mul.w
283bool
284Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
285 // Disable the check for -Oz (aka OptimizeForSizeHarder).
286 if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
287 return false;
288
289 if (!CPSRDef)
290 // If this BB loops back to itself, conservatively avoid narrowing the
291 // first instruction that does partial flag update.
292 return HighLatencyCPSR || FirstInSelfLoop;
293
295 for (const MachineOperand &MO : CPSRDef->operands()) {
296 if (!MO.isReg() || MO.isUndef() || MO.isUse())
297 continue;
298 Register Reg = MO.getReg();
299 if (Reg == 0 || Reg == ARM::CPSR)
300 continue;
301 Defs.insert(Reg);
302 }
303
304 for (const MachineOperand &MO : Use->operands()) {
305 if (!MO.isReg() || MO.isUndef() || MO.isDef())
306 continue;
307 Register Reg = MO.getReg();
308 if (Defs.count(Reg))
309 return false;
310 }
311
312 // If the current CPSR has high latency, try to avoid the false dependency.
313 if (HighLatencyCPSR)
314 return true;
315
316 // tMOVi8 usually doesn't start long dependency chains, and there are a lot
317 // of them, so always shrink them when CPSR doesn't have high latency.
318 if (Use->getOpcode() == ARM::t2MOVi ||
319 Use->getOpcode() == ARM::t2MOVi16)
320 return false;
321
322 // No read-after-write dependency. The narrowing will add false dependency.
323 return true;
324}
325
326bool
327Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
328 bool is2Addr, ARMCC::CondCodes Pred,
329 bool LiveCPSR, bool &HasCC, bool &CCDead) {
330 if ((is2Addr && Entry.PredCC2 == 0) ||
331 (!is2Addr && Entry.PredCC1 == 0)) {
332 if (Pred == ARMCC::AL) {
333 // Not predicated, must set CPSR.
334 if (!HasCC) {
335 // Original instruction was not setting CPSR, but CPSR is not
336 // currently live anyway. It's ok to set it. The CPSR def is
337 // dead though.
338 if (!LiveCPSR) {
339 HasCC = true;
340 CCDead = true;
341 return true;
342 }
343 return false;
344 }
345 } else {
346 // Predicated, must not set CPSR.
347 if (HasCC)
348 return false;
349 }
350 } else if ((is2Addr && Entry.PredCC2 == 2) ||
351 (!is2Addr && Entry.PredCC1 == 2)) {
352 /// Old opcode has an optional def of CPSR.
353 if (HasCC)
354 return true;
355 // If old opcode does not implicitly define CPSR, then it's not ok since
356 // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
357 if (!HasImplicitCPSRDef(MI->getDesc()))
358 return false;
359 HasCC = true;
360 } else {
361 // 16-bit instruction does not set CPSR.
362 if (HasCC)
363 return false;
364 }
365
366 return true;
367}
368
370 unsigned Opc = MI->getOpcode();
371 bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD);
372 bool isLROk = (Opc == ARM::t2STMDB_UPD);
373 bool isSPOk = isPCOk || isLROk;
374 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
375 const MachineOperand &MO = MI->getOperand(i);
376 if (!MO.isReg() || MO.isImplicit())
377 continue;
378 Register Reg = MO.getReg();
379 if (Reg == 0 || Reg == ARM::CPSR)
380 continue;
381 if (isPCOk && Reg == ARM::PC)
382 continue;
383 if (isLROk && Reg == ARM::LR)
384 continue;
385 if (Reg == ARM::SP) {
386 if (isSPOk)
387 continue;
388 if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
389 // Special case for these ldr / str with sp as base register.
390 continue;
391 }
392 if (!isARMLowRegister(Reg))
393 return false;
394 }
395 return true;
396}
397
398bool
399Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
400 const ReduceEntry &Entry) {
401 if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
402 return false;
403
404 unsigned Scale = 1;
405 bool HasImmOffset = false;
406 bool HasShift = false;
407 bool HasOffReg = true;
408 bool isLdStMul = false;
409 unsigned Opc = Entry.NarrowOpc1;
410 unsigned OpNum = 3; // First 'rest' of operands.
411 uint8_t ImmLimit = Entry.Imm1Limit;
412
413 switch (Entry.WideOpc) {
414 default:
415 llvm_unreachable("Unexpected Thumb2 load / store opcode!");
416 case ARM::t2LDRi12:
417 case ARM::t2STRi12:
418 if (MI->getOperand(1).getReg() == ARM::SP) {
419 Opc = Entry.NarrowOpc2;
420 ImmLimit = Entry.Imm2Limit;
421 }
422
423 Scale = 4;
424 HasImmOffset = true;
425 HasOffReg = false;
426 break;
427 case ARM::t2LDRBi12:
428 case ARM::t2STRBi12:
429 HasImmOffset = true;
430 HasOffReg = false;
431 break;
432 case ARM::t2LDRHi12:
433 case ARM::t2STRHi12:
434 Scale = 2;
435 HasImmOffset = true;
436 HasOffReg = false;
437 break;
438 case ARM::t2LDRs:
439 case ARM::t2LDRBs:
440 case ARM::t2LDRHs:
441 case ARM::t2LDRSBs:
442 case ARM::t2LDRSHs:
443 case ARM::t2STRs:
444 case ARM::t2STRBs:
445 case ARM::t2STRHs:
446 HasShift = true;
447 OpNum = 4;
448 break;
449 case ARM::t2LDR_POST:
450 case ARM::t2STR_POST: {
451 if (!MinimizeSize)
452 return false;
453
454 if (!MI->hasOneMemOperand() ||
455 (*MI->memoperands_begin())->getAlign() < Align(4))
456 return false;
457
458 // We're creating a completely different type of load/store - LDM from LDR.
459 // For this reason we can't reuse the logic at the end of this function; we
460 // have to implement the MI building here.
461 bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
462 Register Rt = MI->getOperand(IsStore ? 1 : 0).getReg();
463 Register Rn = MI->getOperand(IsStore ? 0 : 1).getReg();
464 unsigned Offset = MI->getOperand(3).getImm();
465 unsigned PredImm = MI->getOperand(4).getImm();
466 Register PredReg = MI->getOperand(5).getReg();
469
470 if (Offset != 4)
471 return false;
472
473 // Add the 16-bit load / store instruction.
474 DebugLoc dl = MI->getDebugLoc();
475 auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1))
477 .addReg(Rn)
478 .addImm(PredImm)
479 .addReg(PredReg)
480 .addReg(Rt, IsStore ? 0 : RegState::Define);
481
482 // Transfer memoperands.
483 MIB.setMemRefs(MI->memoperands());
484
485 // Transfer MI flags.
486 MIB.setMIFlags(MI->getFlags());
487
488 // Kill the old instruction.
489 MI->eraseFromBundle();
490 ++NumLdSts;
491 return true;
492 }
493 case ARM::t2LDMIA: {
494 Register BaseReg = MI->getOperand(0).getReg();
495 assert(isARMLowRegister(BaseReg));
496
497 // For the non-writeback version (this one), the base register must be
498 // one of the registers being loaded.
499 bool isOK = false;
500 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3)) {
501 if (MO.getReg() == BaseReg) {
502 isOK = true;
503 break;
504 }
505 }
506
507 if (!isOK)
508 return false;
509
510 OpNum = 0;
511 isLdStMul = true;
512 break;
513 }
514 case ARM::t2STMIA: {
515 // t2STMIA is reduced to tSTMIA_UPD which has writeback. We can only do this
516 // if the base register is killed, as then it doesn't matter what its value
517 // is after the instruction.
518 if (!MI->getOperand(0).isKill())
519 return false;
520
521 // If the base register is in the register list and isn't the lowest
522 // numbered register (i.e. it's in operand 4 onwards) then with writeback
523 // the stored value is unknown, so we can't convert to tSTMIA_UPD.
524 Register BaseReg = MI->getOperand(0).getReg();
525 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 4))
526 if (MO.getReg() == BaseReg)
527 return false;
528
529 break;
530 }
531 case ARM::t2LDMIA_RET: {
532 Register BaseReg = MI->getOperand(1).getReg();
533 if (BaseReg != ARM::SP)
534 return false;
535 Opc = Entry.NarrowOpc2; // tPOP_RET
536 OpNum = 2;
537 isLdStMul = true;
538 break;
539 }
540 case ARM::t2LDMIA_UPD:
541 case ARM::t2STMIA_UPD:
542 case ARM::t2STMDB_UPD: {
543 OpNum = 0;
544
545 Register BaseReg = MI->getOperand(1).getReg();
546 if (BaseReg == ARM::SP &&
547 (Entry.WideOpc == ARM::t2LDMIA_UPD ||
548 Entry.WideOpc == ARM::t2STMDB_UPD)) {
549 Opc = Entry.NarrowOpc2; // tPOP or tPUSH
550 OpNum = 2;
551 } else if (!isARMLowRegister(BaseReg) ||
552 (Entry.WideOpc != ARM::t2LDMIA_UPD &&
553 Entry.WideOpc != ARM::t2STMIA_UPD)) {
554 return false;
555 }
556
557 isLdStMul = true;
558 break;
559 }
560 }
561
562 unsigned OffsetReg = 0;
563 bool OffsetKill = false;
564 bool OffsetInternal = false;
565 if (HasShift) {
566 OffsetReg = MI->getOperand(2).getReg();
567 OffsetKill = MI->getOperand(2).isKill();
568 OffsetInternal = MI->getOperand(2).isInternalRead();
569
570 if (MI->getOperand(3).getImm())
571 // Thumb1 addressing mode doesn't support shift.
572 return false;
573 }
574
575 unsigned OffsetImm = 0;
576 if (HasImmOffset) {
577 OffsetImm = MI->getOperand(2).getImm();
578 unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
579
580 if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
581 // Make sure the immediate field fits.
582 return false;
583 }
584
585 // Add the 16-bit load / store instruction.
586 DebugLoc dl = MI->getDebugLoc();
587 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
588
589 // tSTMIA_UPD takes a defining register operand. We've already checked that
590 // the register is killed, so mark it as dead here.
591 if (Entry.WideOpc == ARM::t2STMIA)
592 MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
593
594 if (!isLdStMul) {
595 MIB.add(MI->getOperand(0));
596 MIB.add(MI->getOperand(1));
597
598 if (HasImmOffset)
599 MIB.addImm(OffsetImm / Scale);
600
601 assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
602
603 if (HasOffReg)
604 MIB.addReg(OffsetReg, getKillRegState(OffsetKill) |
605 getInternalReadRegState(OffsetInternal));
606 }
607
608 // Transfer the rest of operands.
609 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), OpNum))
610 MIB.add(MO);
611
612 // Transfer memoperands.
613 MIB.setMemRefs(MI->memoperands());
614
615 // Transfer MI flags.
616 MIB.setMIFlags(MI->getFlags());
617
618 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
619 << " to 16-bit: " << *MIB);
620
622 ++NumLdSts;
623 return true;
624}
625
626bool
627Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
628 const ReduceEntry &Entry,
629 bool LiveCPSR, bool IsSelfLoop) {
630 unsigned Opc = MI->getOpcode();
631 if (Opc == ARM::t2ADDri) {
632 // If the source register is SP, try to reduce to tADDrSPi, otherwise
633 // it's a normal reduce.
634 if (MI->getOperand(1).getReg() != ARM::SP) {
635 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
636 return true;
637 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
638 }
639 // Try to reduce to tADDrSPi.
640 unsigned Imm = MI->getOperand(2).getImm();
641 // The immediate must be in range, the destination register must be a low
642 // reg, the predicate must be "always" and the condition flags must not
643 // be being set.
644 if (Imm & 3 || Imm > 1020)
645 return false;
646 if (!isARMLowRegister(MI->getOperand(0).getReg()))
647 return false;
648 if (MI->getOperand(3).getImm() != ARMCC::AL)
649 return false;
650 const MCInstrDesc &MCID = MI->getDesc();
651 if (MCID.hasOptionalDef() &&
652 MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
653 return false;
654
656 BuildMI(MBB, MI, MI->getDebugLoc(),
657 TII->get(ARM::tADDrSPi))
658 .add(MI->getOperand(0))
659 .add(MI->getOperand(1))
660 .addImm(Imm / 4) // The tADDrSPi has an implied scale by four.
662
663 // Transfer MI flags.
664 MIB.setMIFlags(MI->getFlags());
665
666 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
667 << " to 16-bit: " << *MIB);
668
670 ++NumNarrows;
671 return true;
672 }
673
674 if (Entry.LowRegs1 && !VerifyLowRegs(MI))
675 return false;
676
677 if (MI->mayLoadOrStore())
678 return ReduceLoadStore(MBB, MI, Entry);
679
680 switch (Opc) {
681 default: break;
682 case ARM::t2ADDSri:
683 case ARM::t2ADDSrr: {
684 Register PredReg;
685 if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) {
686 switch (Opc) {
687 default: break;
688 case ARM::t2ADDSri:
689 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
690 return true;
691 [[fallthrough]];
692 case ARM::t2ADDSrr:
693 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
694 }
695 }
696 break;
697 }
698 case ARM::t2RSBri:
699 case ARM::t2RSBSri:
700 case ARM::t2SXTB:
701 case ARM::t2SXTH:
702 case ARM::t2UXTB:
703 case ARM::t2UXTH:
704 if (MI->getOperand(2).getImm() == 0)
705 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
706 break;
707 case ARM::t2MOVi16:
708 // Can convert only 'pure' immediate operands, not immediates obtained as
709 // globals' addresses.
710 if (MI->getOperand(1).isImm())
711 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
712 break;
713 case ARM::t2CMPrr: {
714 // Try to reduce to the lo-reg only version first. Why there are two
715 // versions of the instruction is a mystery.
716 // It would be nice to just have two entries in the main table that
717 // are prioritized, but the table assumes a unique entry for each
718 // source insn opcode. So for now, we hack a local entry record to use.
719 static const ReduceEntry NarrowEntry =
720 { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
721 if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
722 return true;
723 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
724 }
725 case ARM::t2TEQrr: {
726 Register PredReg;
727 // Can only convert to eors if we're not in an IT block.
728 if (getInstrPredicate(*MI, PredReg) != ARMCC::AL)
729 break;
730 // TODO if Operand 0 is not killed but Operand 1 is, then we could write
731 // to Op1 instead.
732 if (MI->getOperand(0).isKill())
733 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
734 }
735 }
736 return false;
737}
738
739bool
740Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
741 const ReduceEntry &Entry,
742 bool LiveCPSR, bool IsSelfLoop) {
743 if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
744 return false;
745
746 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
747 // Don't issue movs with shifter operand for some CPUs unless we
748 // are optimizing for size.
749 return false;
750
751 Register Reg0 = MI->getOperand(0).getReg();
752 Register Reg1 = MI->getOperand(1).getReg();
753 // t2MUL is "special". The tied source operand is second, not first.
754 if (MI->getOpcode() == ARM::t2MUL) {
755 // MULS can be slower than MUL
756 if (!MinimizeSize && STI->avoidMULS())
757 return false;
758 Register Reg2 = MI->getOperand(2).getReg();
759 // Early exit if the regs aren't all low regs.
760 if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
761 || !isARMLowRegister(Reg2))
762 return false;
763 if (Reg0 != Reg2) {
764 // If the other operand also isn't the same as the destination, we
765 // can't reduce.
766 if (Reg1 != Reg0)
767 return false;
768 // Try to commute the operands to make it a 2-address instruction.
769 MachineInstr *CommutedMI = TII->commuteInstruction(*MI);
770 if (!CommutedMI)
771 return false;
772 }
773 } else if (Reg0 != Reg1) {
774 // Try to commute the operands to make it a 2-address instruction.
775 unsigned CommOpIdx1 = 1;
776 unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
777 if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) ||
778 MI->getOperand(CommOpIdx2).getReg() != Reg0)
779 return false;
780 MachineInstr *CommutedMI =
781 TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2);
782 if (!CommutedMI)
783 return false;
784 }
785 if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
786 return false;
787 if (Entry.Imm2Limit) {
788 unsigned Imm = MI->getOperand(2).getImm();
789 unsigned Limit = (1 << Entry.Imm2Limit) - 1;
790 if (Imm > Limit)
791 return false;
792 } else {
793 Register Reg2 = MI->getOperand(2).getReg();
794 if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
795 return false;
796 }
797
798 // Check if it's possible / necessary to transfer the predicate.
799 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
800 Register PredReg;
801 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
802 bool SkipPred = false;
803 if (Pred != ARMCC::AL) {
804 if (!NewMCID.isPredicable())
805 // Can't transfer predicate, fail.
806 return false;
807 } else {
808 SkipPred = !NewMCID.isPredicable();
809 }
810
811 bool HasCC = false;
812 bool CCDead = false;
813 const MCInstrDesc &MCID = MI->getDesc();
814 if (MCID.hasOptionalDef()) {
815 unsigned NumOps = MCID.getNumOperands();
816 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
817 if (HasCC && MI->getOperand(NumOps-1).isDead())
818 CCDead = true;
819 }
820 if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
821 return false;
822
823 // Avoid adding a false dependency on partial flag update by some 16-bit
824 // instructions which has the 's' bit set.
825 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
826 canAddPseudoFlagDep(MI, IsSelfLoop))
827 return false;
828
829 // Add the 16-bit instruction.
830 DebugLoc dl = MI->getDebugLoc();
831 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
832 MIB.add(MI->getOperand(0));
833 if (NewMCID.hasOptionalDef())
834 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
835
836 // Transfer the rest of operands.
837 unsigned NumOps = MCID.getNumOperands();
838 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
839 if (i < NumOps && MCID.operands()[i].isOptionalDef())
840 continue;
841 if (SkipPred && MCID.operands()[i].isPredicate())
842 continue;
843 MIB.add(MI->getOperand(i));
844 }
845
846 // Transfer MI flags.
847 MIB.setMIFlags(MI->getFlags());
848
849 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
850 << " to 16-bit: " << *MIB);
851
853 ++Num2Addrs;
854 return true;
855}
856
857bool
858Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
859 const ReduceEntry &Entry,
860 bool LiveCPSR, bool IsSelfLoop) {
861 if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
862 return false;
863
864 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
865 // Don't issue movs with shifter operand for some CPUs unless we
866 // are optimizing for size.
867 return false;
868
869 unsigned Limit = ~0U;
870 if (Entry.Imm1Limit)
871 Limit = (1 << Entry.Imm1Limit) - 1;
872
873 const MCInstrDesc &MCID = MI->getDesc();
874 for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
875 if (MCID.operands()[i].isPredicate())
876 continue;
877 const MachineOperand &MO = MI->getOperand(i);
878 if (MO.isReg()) {
879 Register Reg = MO.getReg();
880 if (!Reg || Reg == ARM::CPSR)
881 continue;
882 if (Entry.LowRegs1 && !isARMLowRegister(Reg))
883 return false;
884 } else if (MO.isImm() && !MCID.operands()[i].isPredicate()) {
885 if (((unsigned)MO.getImm()) > Limit)
886 return false;
887 }
888 }
889
890 // Check if it's possible / necessary to transfer the predicate.
891 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
892 Register PredReg;
893 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
894 bool SkipPred = false;
895 if (Pred != ARMCC::AL) {
896 if (!NewMCID.isPredicable())
897 // Can't transfer predicate, fail.
898 return false;
899 } else {
900 SkipPred = !NewMCID.isPredicable();
901 }
902
903 bool HasCC = false;
904 bool CCDead = false;
905 if (MCID.hasOptionalDef()) {
906 unsigned NumOps = MCID.getNumOperands();
907 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
908 if (HasCC && MI->getOperand(NumOps-1).isDead())
909 CCDead = true;
910 }
911 if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
912 return false;
913
914 // Avoid adding a false dependency on partial flag update by some 16-bit
915 // instructions which has the 's' bit set.
916 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
917 canAddPseudoFlagDep(MI, IsSelfLoop))
918 return false;
919
920 // Add the 16-bit instruction.
921 DebugLoc dl = MI->getDebugLoc();
922 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
923
924 // TEQ is special in that it doesn't define a register but we're converting
925 // it into an EOR which does. So add the first operand as a def and then
926 // again as a use.
927 if (MCID.getOpcode() == ARM::t2TEQrr) {
928 MIB.add(MI->getOperand(0));
929 MIB->getOperand(0).setIsKill(false);
930 MIB->getOperand(0).setIsDef(true);
931 MIB->getOperand(0).setIsDead(true);
932
933 if (NewMCID.hasOptionalDef())
934 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
935 MIB.add(MI->getOperand(0));
936 } else {
937 MIB.add(MI->getOperand(0));
938 if (NewMCID.hasOptionalDef())
939 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
940 }
941
942 // Transfer the rest of operands.
943 unsigned NumOps = MCID.getNumOperands();
944 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
945 if (i < NumOps && MCID.operands()[i].isOptionalDef())
946 continue;
947 if ((MCID.getOpcode() == ARM::t2RSBSri ||
948 MCID.getOpcode() == ARM::t2RSBri ||
949 MCID.getOpcode() == ARM::t2SXTB ||
950 MCID.getOpcode() == ARM::t2SXTH ||
951 MCID.getOpcode() == ARM::t2UXTB ||
952 MCID.getOpcode() == ARM::t2UXTH) && i == 2)
953 // Skip the zero immediate operand, it's now implicit.
954 continue;
955 bool isPred = (i < NumOps && MCID.operands()[i].isPredicate());
956 if (SkipPred && isPred)
957 continue;
958 const MachineOperand &MO = MI->getOperand(i);
959 if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
960 // Skip implicit def of CPSR. Either it's modeled as an optional
961 // def now or it's already an implicit def on the new instruction.
962 continue;
963 MIB.add(MO);
964 }
965 if (!MCID.isPredicable() && NewMCID.isPredicable())
966 MIB.add(predOps(ARMCC::AL));
967
968 // Transfer MI flags.
969 MIB.setMIFlags(MI->getFlags());
970
971 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
972 << " to 16-bit: " << *MIB);
973
975 ++NumNarrows;
976 return true;
977}
978
979static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
980 bool HasDef = false;
981 for (const MachineOperand &MO : MI.operands()) {
982 if (!MO.isReg() || MO.isUndef() || MO.isUse())
983 continue;
984 if (MO.getReg() != ARM::CPSR)
985 continue;
986
987 DefCPSR = true;
988 if (!MO.isDead())
989 HasDef = true;
990 }
991
992 return HasDef || LiveCPSR;
993}
994
995static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
996 for (const MachineOperand &MO : MI.operands()) {
997 if (!MO.isReg() || MO.isUndef() || MO.isDef())
998 continue;
999 if (MO.getReg() != ARM::CPSR)
1000 continue;
1001 assert(LiveCPSR && "CPSR liveness tracking is wrong!");
1002 if (MO.isKill()) {
1003 LiveCPSR = false;
1004 break;
1005 }
1006 }
1007
1008 return LiveCPSR;
1009}
1010
1011bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
1012 bool LiveCPSR, bool IsSelfLoop,
1013 bool SkipPrologueEpilogue) {
1014 unsigned Opcode = MI->getOpcode();
1015 DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
1016 if (OPI == ReduceOpcodeMap.end())
1017 return false;
1018 if (SkipPrologueEpilogue && (MI->getFlag(MachineInstr::FrameSetup) ||
1019 MI->getFlag(MachineInstr::FrameDestroy)))
1020 return false;
1021 const ReduceEntry &Entry = ReduceTable[OPI->second];
1022
1023 // Don't attempt normal reductions on "special" cases for now.
1024 if (Entry.Special)
1025 return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
1026
1027 // Try to transform to a 16-bit two-address instruction.
1028 if (Entry.NarrowOpc2 &&
1029 ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
1030 return true;
1031
1032 // Try to transform to a 16-bit non-two-address instruction.
1033 if (Entry.NarrowOpc1 &&
1034 ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
1035 return true;
1036
1037 return false;
1038}
1039
1040bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB,
1041 bool SkipPrologueEpilogue) {
1042 bool Modified = false;
1043
1044 // Yes, CPSR could be livein.
1045 bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
1046 MachineInstr *BundleMI = nullptr;
1047
1048 CPSRDef = nullptr;
1049 HighLatencyCPSR = false;
1050
1051 // Check predecessors for the latest CPSRDef.
1052 for (auto *Pred : MBB.predecessors()) {
1053 const MBBInfo &PInfo = BlockInfo[Pred->getNumber()];
1054 if (!PInfo.Visited) {
1055 // Since blocks are visited in RPO, this must be a back-edge.
1056 continue;
1057 }
1058 if (PInfo.HighLatencyCPSR) {
1059 HighLatencyCPSR = true;
1060 break;
1061 }
1062 }
1063
1064 // If this BB loops back to itself, conservatively avoid narrowing the
1065 // first instruction that does partial flag update.
1066 bool IsSelfLoop = MBB.isSuccessor(&MBB);
1069 for (; MII != E; MII = NextMII) {
1070 NextMII = std::next(MII);
1071
1072 MachineInstr *MI = &*MII;
1073 if (MI->isBundle()) {
1074 BundleMI = MI;
1075 continue;
1076 }
1077 if (MI->isDebugInstr())
1078 continue;
1079
1080 LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
1081
1082 // Does NextMII belong to the same bundle as MI?
1083 bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
1084
1085 if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop, SkipPrologueEpilogue)) {
1086 Modified = true;
1087 MachineBasicBlock::instr_iterator I = std::prev(NextMII);
1088 MI = &*I;
1089 // Removing and reinserting the first instruction in a bundle will break
1090 // up the bundle. Fix the bundling if it was broken.
1091 if (NextInSameBundle && !NextMII->isBundledWithPred())
1092 NextMII->bundleWithPred();
1093 }
1094
1095 if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) {
1096 // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
1097 // marker is only on the BUNDLE instruction. Process the BUNDLE
1098 // instruction as we finish with the bundled instruction to work around
1099 // the inconsistency.
1100 if (BundleMI->killsRegister(ARM::CPSR, /*TRI=*/nullptr))
1101 LiveCPSR = false;
1102 MachineOperand *MO =
1103 BundleMI->findRegisterDefOperand(ARM::CPSR, /*TRI=*/nullptr);
1104 if (MO && !MO->isDead())
1105 LiveCPSR = true;
1106 MO = BundleMI->findRegisterUseOperand(ARM::CPSR, /*TRI=*/nullptr);
1107 if (MO && !MO->isKill())
1108 LiveCPSR = true;
1109 }
1110
1111 bool DefCPSR = false;
1112 LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
1113 if (MI->isCall()) {
1114 // Calls don't really set CPSR.
1115 CPSRDef = nullptr;
1116 HighLatencyCPSR = false;
1117 IsSelfLoop = false;
1118 } else if (DefCPSR) {
1119 // This is the last CPSR defining instruction.
1120 CPSRDef = MI;
1121 HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
1122 IsSelfLoop = false;
1123 }
1124 }
1125
1126 MBBInfo &Info = BlockInfo[MBB.getNumber()];
1127 Info.HighLatencyCPSR = HighLatencyCPSR;
1128 Info.Visited = true;
1129 return Modified;
1130}
1131
1132bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
1133 if (PredicateFtor && !PredicateFtor(MF.getFunction()))
1134 return false;
1135
1136 STI = &MF.getSubtarget<ARMSubtarget>();
1137 if (STI->isThumb1Only() || STI->prefers32BitThumb())
1138 return false;
1139
1140 TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
1141
1142 // Optimizing / minimizing size? Minimizing size implies optimizing for size.
1143 OptimizeSize = MF.getFunction().hasOptSize();
1144 MinimizeSize = STI->hasMinSize();
1145
1146 BlockInfo.clear();
1147 BlockInfo.resize(MF.getNumBlockIDs());
1148
1149 // Visit blocks in reverse post-order so LastCPSRDef is known for all
1150 // predecessors.
1152 bool Modified = false;
1153 bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1155 for (MachineBasicBlock *MBB : RPOT)
1156 Modified |= ReduceMBB(*MBB, /*SkipPrologueEpilogue=*/NeedsWinCFI);
1157 return Modified;
1158}
1159
1160/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
1161/// reduction pass.
1163 std::function<bool(const Function &)> Ftor) {
1164 return new Thumb2SizeReduce(std::move(Ftor));
1165}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
MachineBasicBlock & MBB
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Performs the initial survey of the specified function
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static cl::opt< int > ReduceLimit("t2-reduce-limit", cl::init(-1), cl::Hidden)
static cl::opt< int > ReduceLimitLdSt("t2-reduce-limit3", cl::init(-1), cl::Hidden)
static cl::opt< int > ReduceLimit2Addr("t2-reduce-limit2", cl::init(-1), cl::Hidden)
static bool HasImplicitCPSRDef(const MCInstrDesc &MCID)
static bool isHighLatencyCPSR(MachineInstr *Def)
static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR)
static bool VerifyLowRegs(MachineInstr *MI)
#define THUMB2_SIZE_REDUCE_NAME
static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR)
#define DEBUG_TYPE
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:235
bool isThumb1Only() const
Definition: ARMSubtarget.h:375
bool hasMinSize() const
Definition: ARMSubtarget.h:374
A debug info location.
Definition: DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:165
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:706
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:681
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:652
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:199
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:238
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:240
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:266
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:581
bool isPredicable() const
Return true if this instruction has a predicate operand that controls execution.
Definition: MCInstrDesc.h:340
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:231
instr_iterator instr_begin()
instr_iterator erase_instr(MachineInstr *I)
Remove an instruction from the instruction list and delete it.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Instructions::iterator instr_iterator
instr_iterator instr_end()
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
Definition: MachineInstr.h:72
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
static const unsigned CommuteAnyOperandIndex
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Dead
Unused definition.
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
constexpr double e
Definition: MathExtras.h:47
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition: SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
@ Offset
Definition: DWP.cpp:477
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
unsigned getInternalReadRegState(bool B)
unsigned getKillRegState(bool B)
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1886
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
FunctionPass * createThumb2SizeReductionPass(std::function< bool(const Function &)> Ftor=nullptr)
createThumb2SizeReductionPass - Returns an instance of the Thumb2 size reduction pass.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:851
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39