LLVM 22.0.0git
HexagonLoadStoreWidening.cpp
Go to the documentation of this file.
1//===---HexagonLoadStoreWidening.cpp---------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// HexagonStoreWidening:
9// Replace sequences of "narrow" stores to adjacent memory locations with
10// a fewer "wide" stores that have the same effect.
11// For example, replace:
12// S4_storeirb_io %100, 0, 0 ; store-immediate-byte
13// S4_storeirb_io %100, 1, 0 ; store-immediate-byte
14// with
15// S4_storeirh_io %100, 0, 0 ; store-immediate-halfword
16// The above is the general idea. The actual cases handled by the code
17// may be a bit more complex.
18// The purpose of this pass is to reduce the number of outstanding stores,
19// or as one could say, "reduce store queue pressure". Also, wide stores
20// mean fewer stores, and since there are only two memory instructions allowed
21// per packet, it also means fewer packets, and ultimately fewer cycles.
22//
23// HexagonLoadWidening does the same thing as HexagonStoreWidening but
24// for Loads. Here, we try to replace 4-byte Loads with register-pair loads.
25// For example:
26// Replace
27// %2:intregs = L2_loadri_io %1:intregs, 0 :: (load (s32) from %ptr1, align 8)
28// %3:intregs = L2_loadri_io %1:intregs, 4 :: (load (s32) from %ptr2)
29// with
30// %4:doubleregs = L2_loadrd_io %1:intregs, 0 :: (load (s64) from %ptr1)
31// %2:intregs = COPY %4.isub_lo:doubleregs
32// %3:intregs = COPY %4.isub_hi:doubleregs
33//
34// LoadWidening for 8 and 16-bit loads is not useful as we end up generating 2N
35// insts to replace N loads: 1 widened load, N bitwise and, N - 1 shifts
36
37//===---------------------------------------------------------------------===//
38
39#include "Hexagon.h"
40#include "HexagonInstrInfo.h"
41#include "HexagonRegisterInfo.h"
42#include "HexagonSubtarget.h"
54#include "llvm/IR/DebugLoc.h"
56#include "llvm/MC/MCInstrDesc.h"
57#include "llvm/Pass.h"
58#include "llvm/Support/Debug.h"
62#include <algorithm>
63#include <cassert>
64#include <cstdint>
65#include <iterator>
66
67using namespace llvm;
68
69#define DEBUG_TYPE "hexagon-load-store-widening"
70
72 "max-bb-size-for-load-store-widening", cl::Hidden, cl::init(1000),
73 cl::desc("Limit block size to analyze in load/store widening pass"));
74
75namespace {
76
77struct HexagonLoadStoreWidening {
78 enum WideningMode { Store, Load };
79 const HexagonInstrInfo *TII;
82 AliasAnalysis *AA;
84
85public:
86 HexagonLoadStoreWidening(const HexagonInstrInfo *TII,
87 const HexagonRegisterInfo *TRI,
89 MachineFunction *MF, bool StoreMode)
90 : TII(TII), TRI(TRI), MRI(MRI), AA(AA), MF(MF),
91 Mode(StoreMode ? WideningMode::Store : WideningMode::Load),
92 HII(MF->getSubtarget<HexagonSubtarget>().getInstrInfo()) {}
93
94 bool run();
95
96private:
97 const bool Mode;
98 const unsigned MaxWideSize = 8;
99 const HexagonInstrInfo *HII = nullptr;
100
101 using InstrSet = SmallPtrSet<MachineInstr *, 16>;
102 using InstrGroup = SmallVector<MachineInstr *, 8>;
103 using InstrGroupList = SmallVector<InstrGroup, 8>;
104
105 InstrSet ProcessedInsts;
106
107 unsigned getBaseAddressRegister(const MachineInstr *MI);
108 int64_t getOffset(const MachineInstr *MI);
109 int64_t getPostIncrementValue(const MachineInstr *MI);
110 bool handledInstType(const MachineInstr *MI);
111
112 void createGroup(MachineInstr *BaseInst, InstrGroup &Group);
113 void createGroups(MachineBasicBlock &MBB, InstrGroupList &StoreGroups);
114 bool processBasicBlock(MachineBasicBlock &MBB);
115 bool processGroup(InstrGroup &Group);
116 bool selectInsts(InstrGroup::iterator Begin, InstrGroup::iterator End,
117 InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
118 bool createWideInsts(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
119 bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
120 bool createWideLoads(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
121 bool replaceInsts(InstrGroup &OG, InstrGroup &NG);
122 bool areAdjacent(const MachineInstr *S1, const MachineInstr *S2);
123 bool canSwapInstructions(const MachineInstr *A, const MachineInstr *B);
124};
125
126struct HexagonStoreWidening : public MachineFunctionPass {
127 static char ID;
128
129 HexagonStoreWidening() : MachineFunctionPass(ID) {}
130
131 StringRef getPassName() const override { return "Hexagon Store Widening"; }
132
133 void getAnalysisUsage(AnalysisUsage &AU) const override {
137 }
138
139 bool runOnMachineFunction(MachineFunction &MFn) override {
140 if (skipFunction(MFn.getFunction()))
141 return false;
142
143 auto &ST = MFn.getSubtarget<HexagonSubtarget>();
144 const HexagonInstrInfo *TII = ST.getInstrInfo();
145 const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
147 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
148
149 return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, true).run();
150 }
151};
152
153struct HexagonLoadWidening : public MachineFunctionPass {
154 static char ID;
155
156 HexagonLoadWidening() : MachineFunctionPass(ID) {}
157
158 StringRef getPassName() const override { return "Hexagon Load Widening"; }
159
160 void getAnalysisUsage(AnalysisUsage &AU) const override {
164 }
165
166 bool runOnMachineFunction(MachineFunction &MFn) override {
167 if (skipFunction(MFn.getFunction()))
168 return false;
169
170 auto &ST = MFn.getSubtarget<HexagonSubtarget>();
171 const HexagonInstrInfo *TII = ST.getInstrInfo();
172 const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
174 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
175 return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, false).run();
176 }
177};
178
179char HexagonStoreWidening::ID = 0;
180char HexagonLoadWidening::ID = 0;
181
182} // end anonymous namespace
183
184INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
185 "Hexagon Store Widening", false, false)
187INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
188 "Hexagon Store Widening", false, false)
189
190INITIALIZE_PASS_BEGIN(HexagonLoadWidening, "hexagon-widen-loads",
191 "Hexagon Load Widening", false, false)
193INITIALIZE_PASS_END(HexagonLoadWidening, "hexagon-widen-loads",
194 "Hexagon Load Widening", false, false)
195
197 assert(!MI->memoperands_empty() && "Expecting memory operands");
198 return **MI->memoperands_begin();
199}
200
201unsigned
202HexagonLoadStoreWidening::getBaseAddressRegister(const MachineInstr *MI) {
203 assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
204 unsigned Base, Offset;
205 HII->getBaseAndOffsetPosition(*MI, Base, Offset);
206 const MachineOperand &MO = MI->getOperand(Base);
207 assert(MO.isReg() && "Expecting register operand");
208 return MO.getReg();
209}
210
211int64_t HexagonLoadStoreWidening::getOffset(const MachineInstr *MI) {
212 assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
213
214 // On Hexagon, post-incs always have an offset of 0
215 // There is no Offset operand to post-incs
216 if (HII->isPostIncrement(*MI))
217 return 0;
218
219 unsigned Base, Offset;
220
221 HII->getBaseAndOffsetPosition(*MI, Base, Offset);
222 const MachineOperand &MO = MI->getOperand(Offset);
223 switch (MO.getType()) {
225 return MO.getImm();
227 return MO.getOffset();
228 default:
229 break;
230 }
231 llvm_unreachable("Expecting an immediate or global operand");
232}
233
234inline int64_t
235HexagonLoadStoreWidening::getPostIncrementValue(const MachineInstr *MI) {
236 unsigned Base, PostIncIdx;
237 HII->getBaseAndOffsetPosition(*MI, Base, PostIncIdx);
238 const MachineOperand &MO = MI->getOperand(PostIncIdx);
239 return MO.getImm();
240}
241
242// Filtering function: any loads/stores whose opcodes are not "approved" of by
243// this function will not be subjected to widening.
244inline bool HexagonLoadStoreWidening::handledInstType(const MachineInstr *MI) {
245 unsigned Opc = MI->getOpcode();
246 if (Mode == WideningMode::Store) {
247 switch (Opc) {
248 case Hexagon::S4_storeirb_io:
249 case Hexagon::S4_storeirh_io:
250 case Hexagon::S4_storeiri_io:
251 case Hexagon::S2_storeri_io:
252 // Base address must be a register. (Implement FI later.)
253 return MI->getOperand(0).isReg();
254 case Hexagon::S2_storeri_pi:
255 return MI->getOperand(1).isReg();
256 }
257 } else {
258 // LoadWidening for 8 and 16 bit loads needs 2x instructions to replace x
259 // loads. So we only widen 32 bit loads as we don't need to select the
260 // right bits with AND & SHIFT ops.
261 switch (Opc) {
262 case Hexagon::L2_loadri_io:
263 // Base address must be a register and offset must be immediate.
264 return !MI->memoperands_empty() && MI->getOperand(1).isReg() &&
265 MI->getOperand(2).isImm();
266 case Hexagon::L2_loadri_pi:
267 return !MI->memoperands_empty() && MI->getOperand(2).isReg();
268 }
269 }
270 return false;
271}
272
274 DenseSet<Register> &RegDefs,
275 DenseSet<Register> &RegUses) {
276 for (const auto &Op : MI->operands()) {
277 if (!Op.isReg())
278 continue;
279 if (Op.isDef())
280 RegDefs.insert(Op.getReg());
281 if (Op.readsReg())
282 RegUses.insert(Op.getReg());
283 }
284}
285
286bool HexagonLoadStoreWidening::canSwapInstructions(const MachineInstr *A,
287 const MachineInstr *B) {
288 DenseSet<Register> ARegDefs;
289 DenseSet<Register> ARegUses;
290 addDefsUsesToList(A, ARegDefs, ARegUses);
291 if (A->mayLoadOrStore() && B->mayLoadOrStore() &&
292 (A->mayStore() || B->mayStore()) && A->mayAlias(AA, *B, true))
293 return false;
294 for (const auto &BOp : B->operands()) {
295 if (!BOp.isReg())
296 continue;
297 if ((BOp.isDef() || BOp.readsReg()) && ARegDefs.contains(BOp.getReg()))
298 return false;
299 if (BOp.isDef() && ARegUses.contains(BOp.getReg()))
300 return false;
301 }
302 return true;
303}
304
305// Inspect a machine basic block, and generate groups out of loads/stores
306// encountered in the block.
307//
308// A load/store group is a group of loads or stores that use the same base
309// register, and which can be reordered within that group without altering the
310// semantics of the program. A single group could be widened as
311// a whole, if there existed a single load/store instruction with the same
312// semantics as the entire group. In many cases, a single group may need more
313// than one wide load or store.
314void HexagonLoadStoreWidening::createGroups(MachineBasicBlock &MBB,
315 InstrGroupList &StoreGroups) {
316 // Traverse all instructions and if we encounter
317 // a load/store, then try to create a group starting at that instruction
318 // i.e. a sequence of independent loads/stores that can be widened.
319 for (auto I = MBB.begin(); I != MBB.end(); ++I) {
320 MachineInstr *MI = &(*I);
321 if (!handledInstType(MI))
322 continue;
323 if (ProcessedInsts.count(MI))
324 continue;
325
326 // Found a store. Try to create a store group.
327 InstrGroup G;
328 createGroup(MI, G);
329 if (G.size() > 1)
330 StoreGroups.push_back(G);
331 }
332}
333
334// Create a single load/store group. The insts need to be independent between
335// themselves, and also there cannot be other instructions between them
336// that could read or modify storage being read from or stored into.
337void HexagonLoadStoreWidening::createGroup(MachineInstr *BaseInst,
338 InstrGroup &Group) {
339 assert(handledInstType(BaseInst) && "Unexpected instruction");
340 unsigned BaseReg = getBaseAddressRegister(BaseInst);
341 InstrGroup Other;
342
343 Group.push_back(BaseInst);
344 LLVM_DEBUG(dbgs() << "BaseInst: "; BaseInst->dump());
345 auto End = BaseInst->getParent()->end();
346 auto I = BaseInst->getIterator();
347
348 while (true) {
349 I = std::next(I);
350 if (I == End)
351 break;
352 MachineInstr *MI = &(*I);
353
354 // Assume calls are aliased to everything.
355 if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
356 MI->hasOrderedMemoryRef())
357 return;
358
359 if (!handledInstType(MI)) {
360 if (MI->mayLoadOrStore())
361 Other.push_back(MI);
362 continue;
363 }
364
365 // We have a handledInstType instruction
366 // If this load/store instruction is aliased with anything already in the
367 // group, terminate the group now.
368 for (auto GI : Group)
369 if (GI->mayAlias(AA, *MI, true))
370 return;
371 if (Mode == WideningMode::Load) {
372 // Check if current load MI can be moved to the first load instruction
373 // in Group. If any load instruction aliases with memory instructions in
374 // Other, terminate the group.
375 for (auto MemI : Other)
376 if (!canSwapInstructions(MI, MemI))
377 return;
378 } else {
379 // Check if store instructions in the group can be moved to current
380 // store MI. If any store instruction aliases with memory instructions
381 // in Other, terminate the group.
382 for (auto MemI : Other) {
383 if (std::distance(Group.back()->getIterator(), MemI->getIterator()) <=
384 0)
385 continue;
386 for (auto GI : Group)
387 if (!canSwapInstructions(MemI, GI))
388 return;
389 }
390 }
391
392 unsigned BR = getBaseAddressRegister(MI);
393 if (BR == BaseReg) {
394 LLVM_DEBUG(dbgs() << "Added MI to group: "; MI->dump());
395 Group.push_back(MI);
396 ProcessedInsts.insert(MI);
397 }
398 } // while
399}
400
401// Check if load/store instructions S1 and S2 are adjacent. More precisely,
402// S2 has to access memory immediately following that accessed by S1.
403bool HexagonLoadStoreWidening::areAdjacent(const MachineInstr *S1,
404 const MachineInstr *S2) {
405 if (!handledInstType(S1) || !handledInstType(S2))
406 return false;
407
408 const MachineMemOperand &S1MO = getMemTarget(S1);
409
410 // Currently only handling immediate stores.
411 int Off1 = getOffset(S1);
412 int Off2 = getOffset(S2);
413
414 return (Off1 >= 0) ? Off1 + S1MO.getSize().getValue() == unsigned(Off2)
415 : int(Off1 + S1MO.getSize().getValue()) == Off2;
416}
417
418/// Given a sequence of adjacent loads/stores, and a maximum size of a single
419/// wide inst, pick a group of insts that can be replaced by a single load/store
420/// of size not exceeding MaxSize. The selected sequence will be recorded
421/// in OG ("old group" of instructions).
422/// OG should be empty on entry, and should be left empty if the function
423/// fails.
424bool HexagonLoadStoreWidening::selectInsts(InstrGroup::iterator Begin,
425 InstrGroup::iterator End,
426 InstrGroup &OG, unsigned &TotalSize,
427 unsigned MaxSize) {
428 assert(Begin != End && "No instructions to analyze");
429 assert(OG.empty() && "Old group not empty on entry");
430
431 if (std::distance(Begin, End) <= 1)
432 return false;
433
434 MachineInstr *FirstMI = *Begin;
435 assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
436 const MachineMemOperand &FirstMMO = getMemTarget(FirstMI);
437 if (!FirstMMO.getType().isValid())
438 return false;
439
440 unsigned Alignment = FirstMMO.getAlign().value();
441 unsigned SizeAccum = FirstMMO.getSize().getValue();
442 unsigned FirstOffset = getOffset(FirstMI);
443
444 // The initial value of SizeAccum should always be a power of 2.
445 assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
446
447 // If the size of the first store equals to or exceeds the limit, do nothing.
448 if (SizeAccum >= MaxSize)
449 return false;
450
451 // If the size of the first load/store is greater than or equal to the address
452 // stored to, then the inst cannot be made any wider.
453 if (SizeAccum >= Alignment) {
455 dbgs() << "Size of load/store greater than equal to its alignment\n");
456 return false;
457 }
458
459 // The offset of a load/store will put restrictions on how wide the inst can
460 // be. Offsets in loads/stores of size 2^n bytes need to have the n lowest
461 // bits be 0. If the first inst already exhausts the offset limits, quit.
462 // Test this by checking if the next wider size would exceed the limit.
463 // For post-increment instructions, the increment amount needs to follow the
464 // same rule.
465 unsigned OffsetOrIncVal = 0;
466 if (HII->isPostIncrement(*FirstMI))
467 OffsetOrIncVal = getPostIncrementValue(FirstMI);
468 else
469 OffsetOrIncVal = FirstOffset;
470 if ((2 * SizeAccum - 1) & OffsetOrIncVal) {
471 LLVM_DEBUG(dbgs() << "Instruction cannot be widened as the offset/postinc"
472 << " value: " << getPostIncrementValue(FirstMI)
473 << " is invalid in the widened version\n");
474 return false;
475 }
476
477 OG.push_back(FirstMI);
478 MachineInstr *S1 = FirstMI;
479
480 // Pow2Num will be the largest number of elements in OG such that the sum
481 // of sizes of loads/stores 0...Pow2Num-1 will be a power of 2.
482 unsigned Pow2Num = 1;
483 unsigned Pow2Size = SizeAccum;
484 bool HavePostInc = HII->isPostIncrement(*S1);
485
486 // Be greedy: keep accumulating insts as long as they are to adjacent
487 // memory locations, and as long as the total number of bytes stored
488 // does not exceed the limit (MaxSize).
489 // Keep track of when the total size covered is a power of 2, since
490 // this is a size a single load/store can cover.
491 for (InstrGroup::iterator I = Begin + 1; I != End; ++I) {
492 MachineInstr *S2 = *I;
493 // Insts are sorted, so if S1 and S2 are not adjacent, there won't be
494 // any other store to fill the "hole".
495 if (!areAdjacent(S1, S2))
496 break;
497
498 // Cannot widen two post increments, need to return two registers
499 // with incremented values
500 if (HavePostInc && HII->isPostIncrement(*S2))
501 break;
502
503 unsigned S2Size = getMemTarget(S2).getSize().getValue();
504 if (SizeAccum + S2Size > std::min(MaxSize, Alignment))
505 break;
506
507 OG.push_back(S2);
508 SizeAccum += S2Size;
509 if (isPowerOf2_32(SizeAccum)) {
510 Pow2Num = OG.size();
511 Pow2Size = SizeAccum;
512 }
513 if ((2 * Pow2Size - 1) & FirstOffset)
514 break;
515
516 S1 = S2;
517 }
518
519 // The insts don't add up to anything that can be widened. Clean up.
520 if (Pow2Num <= 1) {
521 OG.clear();
522 return false;
523 }
524
525 // Only leave the loads/stores being widened.
526 OG.resize(Pow2Num);
527 TotalSize = Pow2Size;
528 return true;
529}
530
531/// Given an "old group" OG of insts, create a "new group" NG of instructions
532/// to replace them.
533bool HexagonLoadStoreWidening::createWideInsts(InstrGroup &OG, InstrGroup &NG,
534 unsigned TotalSize) {
535 if (Mode == WideningMode::Store) {
536 return createWideStores(OG, NG, TotalSize);
537 }
538 return createWideLoads(OG, NG, TotalSize);
539}
540
541/// Given an "old group" OG of stores, create a "new group" NG of instructions
542/// to replace them. Ideally, NG would only have a single instruction in it,
543/// but that may only be possible for store-immediate.
544bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
545 unsigned TotalSize) {
546 // XXX Current limitations:
547 // - only handle a TotalSize of up to 8
548
549 LLVM_DEBUG(dbgs() << "Creating wide stores\n");
550 if (TotalSize > MaxWideSize)
551 return false;
552
553 uint64_t Acc = 0; // Value accumulator.
554 unsigned Shift = 0;
555 bool HaveImm = false;
556 bool HaveReg = false;
557
558 for (MachineInstr *MI : OG) {
559 const MachineMemOperand &MMO = getMemTarget(MI);
560 MachineOperand &SO = HII->isPostIncrement(*MI)
561 ? MI->getOperand(3)
562 : MI->getOperand(2); // Source.
563 unsigned NBits;
565 uint64_t Val;
566
567 switch (SO.getType()) {
569 LLVM_DEBUG(dbgs() << "Have store immediate\n");
570 HaveImm = true;
571
572 NBits = MMO.getSizeInBits().toRaw();
573 Mask = (0xFFFFFFFFFFFFFFFFU >> (64 - NBits));
574 Val = (SO.getImm() & Mask) << Shift;
575 Acc |= Val;
576 Shift += NBits;
577 break;
579 HaveReg = true;
580 break;
581 default:
582 LLVM_DEBUG(dbgs() << "Unhandled store\n");
583 return false;
584 }
585 }
586
587 if (HaveImm && HaveReg) {
588 LLVM_DEBUG(dbgs() << "Cannot merge store register and store imm\n");
589 return false;
590 }
591
592 MachineInstr *FirstSt = OG.front();
593 DebugLoc DL = OG.back()->getDebugLoc();
594 const MachineMemOperand &OldM = getMemTarget(FirstSt);
595 MachineMemOperand *NewM =
596 MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
597 TotalSize, OldM.getAlign(), OldM.getAAInfo());
598 MachineInstr *StI;
599 MachineOperand &MR =
600 (HII->isPostIncrement(*FirstSt) ? FirstSt->getOperand(1)
601 : FirstSt->getOperand(0));
602 auto SecondSt = OG.back();
603 if (HaveReg) {
604 MachineOperand FReg =
605 (HII->isPostIncrement(*FirstSt) ? FirstSt->getOperand(3)
606 : FirstSt->getOperand(2));
607 // Post increments appear first in the sorted group.
608 // Cannot have a post increment for the second instruction
609 assert(!HII->isPostIncrement(*SecondSt) && "Unexpected PostInc");
610 MachineOperand SReg = SecondSt->getOperand(2);
611 assert(FReg.isReg() && SReg.isReg() &&
612 "Cannot merge store register and store imm");
613 const MCInstrDesc &CombD = TII->get(Hexagon::A2_combinew);
614 Register VReg =
615 MF->getRegInfo().createVirtualRegister(&Hexagon::DoubleRegsRegClass);
616 MachineInstr *CombI = BuildMI(*MF, DL, CombD, VReg).add(SReg).add(FReg);
617 NG.push_back(CombI);
618
619 if (FirstSt->getOpcode() == Hexagon::S2_storeri_pi) {
620 const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_pi);
621 auto IncDestMO = FirstSt->getOperand(0);
622 auto IncMO = FirstSt->getOperand(2);
623 StI =
624 BuildMI(*MF, DL, StD).add(IncDestMO).add(MR).add(IncMO).addReg(VReg);
625 } else {
626 const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_io);
627 auto OffMO = FirstSt->getOperand(1);
628 StI = BuildMI(*MF, DL, StD).add(MR).add(OffMO).addReg(VReg);
629 }
630 StI->addMemOperand(*MF, NewM);
631 NG.push_back(StI);
632 return true;
633 }
634
635 // Handle store immediates
636 // There are no post increment store immediates on Hexagon
637 assert(!HII->isPostIncrement(*FirstSt) && "Unexpected PostInc");
638 auto Off = FirstSt->getOperand(1).getImm();
639 if (TotalSize == 8) {
640 // Create vreg = A2_tfrsi #Acc; nreg = combine(#s32, vreg); memd = nreg
641 uint64_t Mask = 0xFFFFFFFFU;
642 int LowerAcc = int(Mask & Acc);
643 int UpperAcc = Acc >> 32;
644 Register DReg =
645 MF->getRegInfo().createVirtualRegister(&Hexagon::DoubleRegsRegClass);
646 MachineInstr *CombI;
647 if (Acc != 0) {
648 const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
649 const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
650 Register VReg = MF->getRegInfo().createVirtualRegister(RC);
651 MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(LowerAcc);
652 NG.push_back(TfrI);
653 const MCInstrDesc &CombD = TII->get(Hexagon::A4_combineir);
654 CombI = BuildMI(*MF, DL, CombD, DReg)
655 .addImm(UpperAcc)
656 .addReg(VReg, RegState::Kill);
657 }
658 // If immediates are 0, we do not need A2_tfrsi
659 else {
660 const MCInstrDesc &CombD = TII->get(Hexagon::A4_combineii);
661 CombI = BuildMI(*MF, DL, CombD, DReg).addImm(0).addImm(0);
662 }
663 NG.push_back(CombI);
664 const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_io);
665 StI =
666 BuildMI(*MF, DL, StD).add(MR).addImm(Off).addReg(DReg, RegState::Kill);
667 } else if (Acc < 0x10000) {
668 // Create mem[hw] = #Acc
669 unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io
670 : (TotalSize == 4) ? Hexagon::S4_storeiri_io
671 : 0;
672 assert(WOpc && "Unexpected size");
673
674 int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
675 const MCInstrDesc &StD = TII->get(WOpc);
676 StI = BuildMI(*MF, DL, StD).add(MR).addImm(Off).addImm(Val);
677 } else {
678 // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
679 const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
680 const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
681 Register VReg = MF->getRegInfo().createVirtualRegister(RC);
682 MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(int(Acc));
683 NG.push_back(TfrI);
684
685 unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io
686 : (TotalSize == 4) ? Hexagon::S2_storeri_io
687 : 0;
688 assert(WOpc && "Unexpected size");
689
690 const MCInstrDesc &StD = TII->get(WOpc);
691 StI =
692 BuildMI(*MF, DL, StD).add(MR).addImm(Off).addReg(VReg, RegState::Kill);
693 }
694 StI->addMemOperand(*MF, NewM);
695 NG.push_back(StI);
696
697 return true;
698}
699
700/// Given an "old group" OG of loads, create a "new group" NG of instructions
701/// to replace them. Ideally, NG would only have a single instruction in it,
702/// but that may only be possible for double register loads.
703bool HexagonLoadStoreWidening::createWideLoads(InstrGroup &OG, InstrGroup &NG,
704 unsigned TotalSize) {
705 LLVM_DEBUG(dbgs() << "Creating wide loads\n");
706 // XXX Current limitations:
707 // - only expect stores of immediate values in OG,
708 // - only handle a TotalSize of up to 8
709 if (TotalSize > MaxWideSize)
710 return false;
711 assert(OG.size() == 2 && "Expecting two elements in Instruction Group.");
712
713 MachineInstr *FirstLd = OG.front();
714 const MachineMemOperand &OldM = getMemTarget(FirstLd);
715 MachineMemOperand *NewM =
716 MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
717 TotalSize, OldM.getAlign(), OldM.getAAInfo());
718
719 MachineOperand &MR = FirstLd->getOperand(0);
720 MachineOperand &MRBase =
721 (HII->isPostIncrement(*FirstLd) ? FirstLd->getOperand(2)
722 : FirstLd->getOperand(1));
723 DebugLoc DL = OG.back()->getDebugLoc();
724
725 // Create the double register Load Instruction.
726 Register NewMR = MRI->createVirtualRegister(&Hexagon::DoubleRegsRegClass);
727 MachineInstr *LdI;
728
729 // Post increments appear first in the sorted group
730 if (FirstLd->getOpcode() == Hexagon::L2_loadri_pi) {
731 auto IncDestMO = FirstLd->getOperand(1);
732 auto IncMO = FirstLd->getOperand(3);
733 LdI = BuildMI(*MF, DL, TII->get(Hexagon::L2_loadrd_pi))
734 .addDef(NewMR, getKillRegState(MR.isKill()), MR.getSubReg())
735 .add(IncDestMO)
736 .add(MRBase)
737 .add(IncMO);
738 LdI->addMemOperand(*MF, NewM);
739 } else {
740 auto OffMO = FirstLd->getOperand(2);
741 LdI = BuildMI(*MF, DL, TII->get(Hexagon::L2_loadrd_io))
742 .addDef(NewMR, getKillRegState(MR.isKill()), MR.getSubReg())
743 .add(MRBase)
744 .add(OffMO);
745 LdI->addMemOperand(*MF, NewM);
746 }
747 NG.push_back(LdI);
748
749 auto getHalfReg = [&](MachineInstr *DoubleReg, unsigned SubReg,
750 MachineInstr *DstReg) {
751 Register DestReg = DstReg->getOperand(0).getReg();
752 return BuildMI(*MF, DL, TII->get(Hexagon::COPY), DestReg)
753 .addReg(NewMR, getKillRegState(LdI->isKill()), SubReg);
754 };
755
756 MachineInstr *LdI_lo = getHalfReg(LdI, Hexagon::isub_lo, FirstLd);
757 MachineInstr *LdI_hi = getHalfReg(LdI, Hexagon::isub_hi, OG.back());
758 NG.push_back(LdI_lo);
759 NG.push_back(LdI_hi);
760
761 return true;
762}
763
764// Replace instructions from the old group OG with instructions from the
765// new group NG. Conceptually, remove all instructions in OG, and then
766// insert all instructions in NG, starting at where the first instruction
767// from OG was (in the order in which they appeared in the basic block).
768// (The ordering in OG does not have to match the order in the basic block.)
769bool HexagonLoadStoreWidening::replaceInsts(InstrGroup &OG, InstrGroup &NG) {
770 LLVM_DEBUG({
771 dbgs() << "Replacing:\n";
772 for (auto I : OG)
773 dbgs() << " " << *I;
774 dbgs() << "with\n";
775 for (auto I : NG)
776 dbgs() << " " << *I;
777 });
778
780 MachineBasicBlock::iterator InsertAt = MBB->end();
781
782 // Need to establish the insertion point.
783 // For loads the best one is right before the first load in the OG,
784 // but in the order in which the insts occur in the program list.
785 // For stores the best point is right after the last store in the OG.
786 // Since the ordering in OG does not correspond
787 // to the order in the program list, we need to do some work to find
788 // the insertion point.
789
790 // Create a set of all instructions in OG (for quick lookup).
791 InstrSet OldMemInsts(llvm::from_range, OG);
792
793 if (Mode == WideningMode::Load) {
794 // Find the first load instruction in the block that is present in OG.
795 for (auto &I : *MBB) {
796 if (OldMemInsts.count(&I)) {
797 InsertAt = I;
798 break;
799 }
800 }
801
802 assert((InsertAt != MBB->end()) && "Cannot locate any load from the group");
803
804 for (auto *I : NG)
805 MBB->insert(InsertAt, I);
806 } else {
807 // Find the last store instruction in the block that is present in OG.
808 auto I = MBB->rbegin();
809 for (; I != MBB->rend(); ++I) {
810 if (OldMemInsts.count(&(*I))) {
811 InsertAt = (*I).getIterator();
812 break;
813 }
814 }
815
816 assert((I != MBB->rend()) && "Cannot locate any store from the group");
817
818 for (auto I = NG.rbegin(); I != NG.rend(); ++I)
819 MBB->insertAfter(InsertAt, *I);
820 }
821
822 for (auto *I : OG)
823 I->eraseFromParent();
824
825 return true;
826}
827
828// Break up the group into smaller groups, each of which can be replaced by
829// a single wide load/store. Widen each such smaller group and replace the old
830// instructions with the widened ones.
831bool HexagonLoadStoreWidening::processGroup(InstrGroup &Group) {
832 bool Changed = false;
833 InstrGroup::iterator I = Group.begin(), E = Group.end();
834 InstrGroup OG, NG; // Old and new groups.
835 unsigned CollectedSize;
836
837 while (I != E) {
838 OG.clear();
839 NG.clear();
840
841 bool Succ = selectInsts(I++, E, OG, CollectedSize, MaxWideSize) &&
842 createWideInsts(OG, NG, CollectedSize) && replaceInsts(OG, NG);
843 if (!Succ)
844 continue;
845
846 assert(OG.size() > 1 && "Created invalid group");
847 assert(std::distance(I, E) + 1 >= int(OG.size()) && "Too many elements");
848 I += OG.size() - 1;
849
850 Changed = true;
851 }
852
853 return Changed;
854}
855
856// Process a single basic block: create the load/store groups, and replace them
857// with the widened insts, if possible. Processing of each basic block
858// is independent from processing of any other basic block. This transfor-
859// mation could be stopped after having processed any basic block without
860// any ill effects (other than not having performed widening in the unpro-
861// cessed blocks). Also, the basic blocks can be processed in any order.
862bool HexagonLoadStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
863 InstrGroupList SGs;
864 bool Changed = false;
865
866 // To prevent long compile time check for max BB size.
868 return false;
869
870 createGroups(MBB, SGs);
871
872 auto Less = [this](const MachineInstr *A, const MachineInstr *B) -> bool {
873 return getOffset(A) < getOffset(B);
874 };
875 for (auto &G : SGs) {
876 assert(G.size() > 1 && "Group with fewer than 2 elements");
877 llvm::sort(G, Less);
878
879 Changed |= processGroup(G);
880 }
881
882 return Changed;
883}
884
885bool HexagonLoadStoreWidening::run() {
886 bool Changed = false;
887
888 for (auto &B : *MF)
889 Changed |= processBasicBlock(B);
890
891 return Changed;
892}
893
895 return new HexagonStoreWidening();
896}
897
899 return new HexagonLoadWidening();
900}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
constexpr LLT S1
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1328
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
hexagon widen Hexagon Store Widening
static cl::opt< unsigned > MaxMBBSizeForLoadStoreWidening("max-bb-size-for-load-store-widening", cl::Hidden, cl::init(1000), cl::desc("Limit block size to analyze in load/store widening pass"))
hexagon widen Hexagon Store false hexagon widen loads
hexagon widen stores
static void addDefsUsesToList(const MachineInstr *MI, DenseSet< Register > &RegDefs, DenseSet< Register > &RegUses)
hexagon widen Hexagon Store false hexagon widen Hexagon Load static false const MachineMemOperand & getMemTarget(const MachineInstr *MI)
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
Register const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
This file defines the SmallPtrSet class.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static unsigned getSize(unsigned Kind)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A private abstract base class describing the concept of an individual alias analysis implementation.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:124
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:188
constexpr bool isValid() const
Definition: LowLevelType.h:146
TypeSize getValue() const
uint64_t toRaw() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:199
reverse_iterator rend()
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_iterator rbegin()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:810
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:511
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
bool isKill() const
LLVM_ABI void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Register getReg() const
getReg - Returns the register number.
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_Register
Register operand.
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void dump() const
Definition: Pass.cpp:146
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:169
self_iterator getIterator()
Definition: ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1157
@ Kill
The last use of a register.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition: SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr from_range_t from_range
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
FunctionPass * createHexagonLoadWidening()
unsigned getKillRegState(bool B)
FunctionPass * createHexagonStoreWidening()
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85