LLVM 22.0.0git
ModuloSchedule.cpp
Go to the documentation of this file.
1//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
17#include "llvm/MC/MCContext.h"
18#include "llvm/Support/Debug.h"
21
22#define DEBUG_TYPE "pipeliner"
23using namespace llvm;
24
26 "pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false),
27 cl::desc("Swap target blocks of a conditional branch for MVE expander"));
28
30 for (MachineInstr *MI : ScheduledInstrs)
31 OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
32}
33
34//===----------------------------------------------------------------------===//
35// ModuloScheduleExpander implementation
36//===----------------------------------------------------------------------===//
37
38/// Return the register values for the operands of a Phi instruction.
39/// This function assume the instruction is a Phi.
41 Register &InitVal, Register &LoopVal) {
42 assert(Phi.isPHI() && "Expecting a Phi.");
43
44 InitVal = Register();
45 LoopVal = Register();
46 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
47 if (Phi.getOperand(i + 1).getMBB() != Loop)
48 InitVal = Phi.getOperand(i).getReg();
49 else
50 LoopVal = Phi.getOperand(i).getReg();
51
52 assert(InitVal && LoopVal && "Unexpected Phi structure.");
53}
54
55/// Return the Phi register value that comes from the incoming block.
57 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
58 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
59 return Phi.getOperand(i).getReg();
60 return Register();
61}
62
63/// Return the Phi register value that comes the loop block.
65 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
66 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
67 return Phi.getOperand(i).getReg();
68 return Register();
69}
70
72 BB = Schedule.getLoop()->getTopBlock();
73 Preheader = *BB->pred_begin();
74 if (Preheader == BB)
75 Preheader = *std::next(BB->pred_begin());
76
77 // Iterate over the definitions in each instruction, and compute the
78 // stage difference for each use. Keep the maximum value.
79 for (MachineInstr *MI : Schedule.getInstructions()) {
80 int DefStage = Schedule.getStage(MI);
81 for (const MachineOperand &Op : MI->all_defs()) {
82 Register Reg = Op.getReg();
83 unsigned MaxDiff = 0;
84 bool PhiIsSwapped = false;
85 for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
86 MachineInstr *UseMI = UseOp.getParent();
87 int UseStage = Schedule.getStage(UseMI);
88 unsigned Diff = 0;
89 if (UseStage != -1 && UseStage >= DefStage)
90 Diff = UseStage - DefStage;
91 if (MI->isPHI()) {
92 if (isLoopCarried(*MI))
93 ++Diff;
94 else
95 PhiIsSwapped = true;
96 }
97 MaxDiff = std::max(Diff, MaxDiff);
98 }
99 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
100 }
101 }
102
103 generatePipelinedLoop();
104}
105
106void ModuloScheduleExpander::generatePipelinedLoop() {
108 assert(LoopInfo && "Must be able to analyze loop!");
109
110 // Create a new basic block for the kernel and add it to the CFG.
112
113 unsigned MaxStageCount = Schedule.getNumStages() - 1;
114
115 // Remember the registers that are used in different stages. The index is
116 // the iteration, or stage, that the instruction is scheduled in. This is
117 // a map between register names in the original block and the names created
118 // in each stage of the pipelined loop.
119 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
120
121 // The renaming destination by Phis for the registers across stages.
122 // This map is updated during Phis generation to point to the most recent
123 // renaming destination.
124 ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
125
126 InstrMapTy InstrMap;
127
129
130 // Generate the prolog instructions that set up the pipeline.
131 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
132 MF.insert(BB->getIterator(), KernelBB);
133 LIS.insertMBBInMaps(KernelBB);
134
135 // Rearrange the instructions to generate the new, pipelined loop,
136 // and update register names as needed.
137 for (MachineInstr *CI : Schedule.getInstructions()) {
138 if (CI->isPHI())
139 continue;
140 unsigned StageNum = Schedule.getStage(CI);
141 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
142 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
143 KernelBB->push_back(NewMI);
144 LIS.InsertMachineInstrInMaps(*NewMI);
145 InstrMap[NewMI] = CI;
146 }
147
148 // Copy any terminator instructions to the new kernel, and update
149 // names as needed.
150 for (MachineInstr &MI : BB->terminators()) {
151 MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
152 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
153 KernelBB->push_back(NewMI);
154 LIS.InsertMachineInstrInMaps(*NewMI);
155 InstrMap[NewMI] = &MI;
156 }
157
158 NewKernel = KernelBB;
159 KernelBB->transferSuccessors(BB);
160 KernelBB->replaceSuccessor(BB, KernelBB);
161
162 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
163 InstrMap, MaxStageCount, MaxStageCount, false);
164 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
165 InstrMap, MaxStageCount, MaxStageCount, false);
166
167 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
168
170 // Generate the epilog instructions to complete the pipeline.
171 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
172 PrologBBs);
173
174 // We need this step because the register allocation doesn't handle some
175 // situations well, so we insert copies to help out.
176 splitLifetimes(KernelBB, EpilogBBs);
177
178 // Remove dead instructions due to loop induction variables.
179 removeDeadInstructions(KernelBB, EpilogBBs);
180
181 // Add branches between prolog and epilog blocks.
182 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
183
184 delete[] VRMap;
185 delete[] VRMapPhi;
186}
187
189 // Remove the original loop since it's no longer referenced.
190 for (auto &I : *BB)
192 BB->clear();
193 BB->eraseFromParent();
194}
195
196/// Generate the pipeline prolog code.
197void ModuloScheduleExpander::generateProlog(unsigned LastStage,
198 MachineBasicBlock *KernelBB,
199 ValueMapTy *VRMap,
200 MBBVectorTy &PrologBBs) {
201 MachineBasicBlock *PredBB = Preheader;
202 InstrMapTy InstrMap;
203
204 // Generate a basic block for each stage, not including the last stage,
205 // which will be generated in the kernel. Each basic block may contain
206 // instructions from multiple stages/iterations.
207 for (unsigned i = 0; i < LastStage; ++i) {
208 // Create and insert the prolog basic block prior to the original loop
209 // basic block. The original loop is removed later.
211 PrologBBs.push_back(NewBB);
212 MF.insert(BB->getIterator(), NewBB);
213 NewBB->transferSuccessors(PredBB);
214 PredBB->addSuccessor(NewBB);
215 PredBB = NewBB;
216 LIS.insertMBBInMaps(NewBB);
217
218 // Generate instructions for each appropriate stage. Process instructions
219 // in original program order.
220 for (int StageNum = i; StageNum >= 0; --StageNum) {
222 BBE = BB->getFirstTerminator();
223 BBI != BBE; ++BBI) {
224 if (Schedule.getStage(&*BBI) == StageNum) {
225 if (BBI->isPHI())
226 continue;
227 MachineInstr *NewMI =
228 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
229 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
230 NewBB->push_back(NewMI);
231 LIS.InsertMachineInstrInMaps(*NewMI);
232 InstrMap[NewMI] = &*BBI;
233 }
234 }
235 }
236 rewritePhiValues(NewBB, i, VRMap, InstrMap);
237 LLVM_DEBUG({
238 dbgs() << "prolog:\n";
239 NewBB->dump();
240 });
241 }
242
243 PredBB->replaceSuccessor(BB, KernelBB);
244
245 // Check if we need to remove the branch from the preheader to the original
246 // loop, and replace it with a branch to the new loop.
247 unsigned numBranches = TII->removeBranch(*Preheader);
248 if (numBranches) {
250 TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
251 }
252}
253
254/// Generate the pipeline epilog code. The epilog code finishes the iterations
255/// that were started in either the prolog or the kernel. We create a basic
256/// block for each stage that needs to complete.
257void ModuloScheduleExpander::generateEpilog(
258 unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
259 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
260 MBBVectorTy &PrologBBs) {
261 // We need to change the branch from the kernel to the first epilog block, so
262 // this call to analyze branch uses the kernel rather than the original BB.
263 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
265 bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
266 assert(!checkBranch && "generateEpilog must be able to analyze the branch");
267 if (checkBranch)
268 return;
269
270 MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
271 if (*LoopExitI == KernelBB)
272 ++LoopExitI;
273 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
274 MachineBasicBlock *LoopExitBB = *LoopExitI;
275
276 MachineBasicBlock *PredBB = KernelBB;
277 MachineBasicBlock *EpilogStart = LoopExitBB;
278 InstrMapTy InstrMap;
279
280 // Generate a basic block for each stage, not including the last stage,
281 // which was generated for the kernel. Each basic block may contain
282 // instructions from multiple stages/iterations.
283 int EpilogStage = LastStage + 1;
284 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
286 EpilogBBs.push_back(NewBB);
287 MF.insert(BB->getIterator(), NewBB);
288
289 PredBB->replaceSuccessor(LoopExitBB, NewBB);
290 NewBB->addSuccessor(LoopExitBB);
291 LIS.insertMBBInMaps(NewBB);
292
293 if (EpilogStart == LoopExitBB)
294 EpilogStart = NewBB;
295
296 // Add instructions to the epilog depending on the current block.
297 // Process instructions in original program order.
298 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
299 for (auto &BBI : *BB) {
300 if (BBI.isPHI())
301 continue;
302 MachineInstr *In = &BBI;
303 if ((unsigned)Schedule.getStage(In) == StageNum) {
304 // Instructions with memoperands in the epilog are updated with
305 // conservative values.
306 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
307 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
308 NewBB->push_back(NewMI);
309 LIS.InsertMachineInstrInMaps(*NewMI);
310 InstrMap[NewMI] = In;
311 }
312 }
313 }
314 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
315 InstrMap, LastStage, EpilogStage, i == 1);
316 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
317 InstrMap, LastStage, EpilogStage, i == 1);
318 PredBB = NewBB;
319
320 LLVM_DEBUG({
321 dbgs() << "epilog:\n";
322 NewBB->dump();
323 });
324 }
325
326 // Fix any Phi nodes in the loop exit block.
327 LoopExitBB->replacePhiUsesWith(BB, PredBB);
328
329 // Create a branch to the new epilog from the kernel.
330 // Remove the original branch and add a new branch to the epilog.
331 TII->removeBranch(*KernelBB);
332 assert((OrigBB == TBB || OrigBB == FBB) &&
333 "Unable to determine looping branch direction");
334 if (OrigBB != TBB)
335 TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
336 else
337 TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
338 // Add a branch to the loop exit.
339 if (EpilogBBs.size() > 0) {
340 MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
342 TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
343 }
344}
345
346/// Replace all uses of FromReg that appear outside the specified
347/// basic block with ToReg.
348static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg,
351 for (MachineOperand &O :
352 llvm::make_early_inc_range(MRI.use_operands(FromReg)))
353 if (O.getParent()->getParent() != MBB)
354 O.setReg(ToReg);
355}
356
357/// Return true if the register has a use that occurs outside the
358/// specified loop.
361 for (const MachineOperand &MO : MRI.use_operands(Reg))
362 if (MO.getParent()->getParent() != BB)
363 return true;
364 return false;
365}
366
367/// Generate Phis for the specific block in the generated pipelined code.
368/// This function looks at the Phis from the original code to guide the
369/// creation of new Phis.
370void ModuloScheduleExpander::generateExistingPhis(
372 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
373 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
374 // Compute the stage number for the initial value of the Phi, which
375 // comes from the prolog. The prolog to use depends on to which kernel/
376 // epilog that we're adding the Phi.
377 unsigned PrologStage = 0;
378 unsigned PrevStage = 0;
379 bool InKernel = (LastStageNum == CurStageNum);
380 if (InKernel) {
381 PrologStage = LastStageNum - 1;
382 PrevStage = CurStageNum;
383 } else {
384 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
385 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
386 }
387
388 for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
389 BBE = BB->getFirstNonPHI();
390 BBI != BBE; ++BBI) {
391 Register Def = BBI->getOperand(0).getReg();
392
393 Register InitVal;
394 Register LoopVal;
395 getPhiRegs(*BBI, BB, InitVal, LoopVal);
396
397 Register PhiOp1;
398 // The Phi value from the loop body typically is defined in the loop, but
399 // not always. So, we need to check if the value is defined in the loop.
400 Register PhiOp2 = LoopVal;
401 if (auto It = VRMap[LastStageNum].find(LoopVal);
402 It != VRMap[LastStageNum].end())
403 PhiOp2 = It->second;
404
405 int StageScheduled = Schedule.getStage(&*BBI);
406 int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
407 unsigned NumStages = getStagesForReg(Def, CurStageNum);
408 if (NumStages == 0) {
409 // We don't need to generate a Phi anymore, but we need to rename any uses
410 // of the Phi value.
411 Register NewReg = VRMap[PrevStage][LoopVal];
412 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
413 InitVal, NewReg);
414 auto It = VRMap[CurStageNum].find(LoopVal);
415 if (It != VRMap[CurStageNum].end()) {
416 Register Reg = It->second;
417 VRMap[CurStageNum][Def] = Reg;
418 }
419 }
420 // Adjust the number of Phis needed depending on the number of prologs left,
421 // and the distance from where the Phi is first scheduled. The number of
422 // Phis cannot exceed the number of prolog stages. Each stage can
423 // potentially define two values.
424 unsigned MaxPhis = PrologStage + 2;
425 if (!InKernel && (int)PrologStage <= LoopValStage)
426 MaxPhis = std::max((int)MaxPhis - LoopValStage, 1);
427 unsigned NumPhis = std::min(NumStages, MaxPhis);
428
429 Register NewReg;
430 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
431 // In the epilog, we may need to look back one stage to get the correct
432 // Phi name, because the epilog and prolog blocks execute the same stage.
433 // The correct name is from the previous block only when the Phi has
434 // been completely scheduled prior to the epilog, and Phi value is not
435 // needed in multiple stages.
436 int StageDiff = 0;
437 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
438 NumPhis == 1)
439 StageDiff = 1;
440 // Adjust the computations below when the phi and the loop definition
441 // are scheduled in different stages.
442 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
443 StageDiff = StageScheduled - LoopValStage;
444 for (unsigned np = 0; np < NumPhis; ++np) {
445 // If the Phi hasn't been scheduled, then use the initial Phi operand
446 // value. Otherwise, use the scheduled version of the instruction. This
447 // is a little complicated when a Phi references another Phi.
448 if (np > PrologStage || StageScheduled >= (int)LastStageNum)
449 PhiOp1 = InitVal;
450 // Check if the Phi has already been scheduled in a prolog stage.
451 else if (PrologStage >= AccessStage + StageDiff + np &&
452 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
453 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
454 // Check if the Phi has already been scheduled, but the loop instruction
455 // is either another Phi, or doesn't occur in the loop.
456 else if (PrologStage >= AccessStage + StageDiff + np) {
457 // If the Phi references another Phi, we need to examine the other
458 // Phi to get the correct value.
459 PhiOp1 = LoopVal;
460 MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
461 int Indirects = 1;
462 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
463 int PhiStage = Schedule.getStage(InstOp1);
464 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
465 PhiOp1 = getInitPhiReg(*InstOp1, BB);
466 else
467 PhiOp1 = getLoopPhiReg(*InstOp1, BB);
468 InstOp1 = MRI.getVRegDef(PhiOp1);
469 int PhiOpStage = Schedule.getStage(InstOp1);
470 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
471 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np) {
472 auto &M = VRMap[PrologStage - StageAdj - Indirects - np];
473 if (auto It = M.find(PhiOp1); It != M.end()) {
474 PhiOp1 = It->second;
475 break;
476 }
477 }
478 ++Indirects;
479 }
480 } else
481 PhiOp1 = InitVal;
482 // If this references a generated Phi in the kernel, get the Phi operand
483 // from the incoming block.
484 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
485 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
486 PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
487
488 MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
489 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
490 // In the epilog, a map lookup is needed to get the value from the kernel,
491 // or previous epilog block. How is does this depends on if the
492 // instruction is scheduled in the previous block.
493 if (!InKernel) {
494 int StageDiffAdj = 0;
495 if (LoopValStage != -1 && StageScheduled > LoopValStage)
496 StageDiffAdj = StageScheduled - LoopValStage;
497 // Use the loop value defined in the kernel, unless the kernel
498 // contains the last definition of the Phi.
499 if (np == 0 && PrevStage == LastStageNum &&
500 (StageScheduled != 0 || LoopValStage != 0) &&
501 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
502 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
503 // Use the value defined by the Phi. We add one because we switch
504 // from looking at the loop value to the Phi definition.
505 else if (np > 0 && PrevStage == LastStageNum &&
506 VRMap[PrevStage - np + 1].count(Def))
507 PhiOp2 = VRMap[PrevStage - np + 1][Def];
508 // Use the loop value defined in the kernel.
509 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
510 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
511 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
512 // Use the value defined by the Phi, unless we're generating the first
513 // epilog and the Phi refers to a Phi in a different stage.
514 else if (VRMap[PrevStage - np].count(Def) &&
515 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
516 (LoopValStage == StageScheduled)))
517 PhiOp2 = VRMap[PrevStage - np][Def];
518 }
519
520 // Check if we can reuse an existing Phi. This occurs when a Phi
521 // references another Phi, and the other Phi is scheduled in an
522 // earlier stage. We can try to reuse an existing Phi up until the last
523 // stage of the current Phi.
524 if (LoopDefIsPhi) {
525 if (static_cast<int>(PrologStage - np) >= StageScheduled) {
526 int LVNumStages = getStagesForPhi(LoopVal);
527 int StageDiff = (StageScheduled - LoopValStage);
528 LVNumStages -= StageDiff;
529 // Make sure the loop value Phi has been processed already.
530 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
531 NewReg = PhiOp2;
532 unsigned ReuseStage = CurStageNum;
533 if (isLoopCarried(*PhiInst))
534 ReuseStage -= LVNumStages;
535 // Check if the Phi to reuse has been generated yet. If not, then
536 // there is nothing to reuse.
537 if (VRMap[ReuseStage - np].count(LoopVal)) {
538 NewReg = VRMap[ReuseStage - np][LoopVal];
539
540 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
541 Def, NewReg);
542 // Update the map with the new Phi name.
543 VRMap[CurStageNum - np][Def] = NewReg;
544 PhiOp2 = NewReg;
545 if (VRMap[LastStageNum - np - 1].count(LoopVal))
546 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
547
548 if (IsLast && np == NumPhis - 1)
549 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
550 continue;
551 }
552 }
553 }
554 if (InKernel && StageDiff > 0 &&
555 VRMap[CurStageNum - StageDiff - np].count(LoopVal))
556 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
557 }
558
559 const TargetRegisterClass *RC = MRI.getRegClass(Def);
560 NewReg = MRI.createVirtualRegister(RC);
561
562 MachineInstrBuilder NewPhi =
563 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
564 TII->get(TargetOpcode::PHI), NewReg);
565 NewPhi.addReg(PhiOp1).addMBB(BB1);
566 NewPhi.addReg(PhiOp2).addMBB(BB2);
567 LIS.InsertMachineInstrInMaps(*NewPhi);
568 if (np == 0)
569 InstrMap[NewPhi] = &*BBI;
570
571 // We define the Phis after creating the new pipelined code, so
572 // we need to rename the Phi values in scheduled instructions.
573
574 Register PrevReg;
575 if (InKernel && VRMap[PrevStage - np].count(LoopVal))
576 PrevReg = VRMap[PrevStage - np][LoopVal];
577 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
578 NewReg, PrevReg);
579 // If the Phi has been scheduled, use the new name for rewriting.
580 if (VRMap[CurStageNum - np].count(Def)) {
581 Register R = VRMap[CurStageNum - np][Def];
582 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
583 NewReg);
584 }
585
586 // Check if we need to rename any uses that occurs after the loop. The
587 // register to replace depends on whether the Phi is scheduled in the
588 // epilog.
589 if (IsLast && np == NumPhis - 1)
590 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
591
592 // In the kernel, a dependent Phi uses the value from this Phi.
593 if (InKernel)
594 PhiOp2 = NewReg;
595
596 // Update the map with the new Phi name.
597 VRMap[CurStageNum - np][Def] = NewReg;
598 }
599
600 while (NumPhis++ < NumStages) {
601 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
602 NewReg, 0);
603 }
604
605 // Check if we need to rename a Phi that has been eliminated due to
606 // scheduling.
607 if (NumStages == 0 && IsLast) {
608 auto &CurStageMap = VRMap[CurStageNum];
609 auto It = CurStageMap.find(LoopVal);
610 if (It != CurStageMap.end())
611 replaceRegUsesAfterLoop(Def, It->second, BB, MRI);
612 }
613 }
614}
615
616/// Generate Phis for the specified block in the generated pipelined code.
617/// These are new Phis needed because the definition is scheduled after the
618/// use in the pipelined sequence.
619void ModuloScheduleExpander::generatePhis(
621 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
622 InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
623 bool IsLast) {
624 // Compute the stage number that contains the initial Phi value, and
625 // the Phi from the previous stage.
626 unsigned PrologStage = 0;
627 unsigned PrevStage = 0;
628 unsigned StageDiff = CurStageNum - LastStageNum;
629 bool InKernel = (StageDiff == 0);
630 if (InKernel) {
631 PrologStage = LastStageNum - 1;
632 PrevStage = CurStageNum;
633 } else {
634 PrologStage = LastStageNum - StageDiff;
635 PrevStage = LastStageNum + StageDiff - 1;
636 }
637
638 for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
639 BBE = BB->instr_end();
640 BBI != BBE; ++BBI) {
641 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
642 MachineOperand &MO = BBI->getOperand(i);
643 if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
644 continue;
645
646 int StageScheduled = Schedule.getStage(&*BBI);
647 assert(StageScheduled != -1 && "Expecting scheduled instruction.");
648 Register Def = MO.getReg();
649 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
650 // An instruction scheduled in stage 0 and is used after the loop
651 // requires a phi in the epilog for the last definition from either
652 // the kernel or prolog.
653 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
654 hasUseAfterLoop(Def, BB, MRI))
655 NumPhis = 1;
656 if (!InKernel && (unsigned)StageScheduled > PrologStage)
657 continue;
658
659 Register PhiOp2;
660 if (InKernel) {
661 PhiOp2 = VRMap[PrevStage][Def];
662 if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
663 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
664 PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
665 }
666 // The number of Phis can't exceed the number of prolog stages. The
667 // prolog stage number is zero based.
668 if (NumPhis > PrologStage + 1 - StageScheduled)
669 NumPhis = PrologStage + 1 - StageScheduled;
670 for (unsigned np = 0; np < NumPhis; ++np) {
671 // Example for
672 // Org:
673 // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
674 //
675 // Prolog0 (Stage0):
676 // %Clone0 = ...
677 // Prolog1 (Stage1):
678 // %Clone1 = ...
679 // Kernel (Stage2):
680 // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
681 // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
682 // %Clone2 = ...
683 // Epilog0 (Stage3):
684 // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
685 // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
686 // Epilog1 (Stage4):
687 // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
688 //
689 // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
690 // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
691 // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
692
693 Register PhiOp1 = VRMap[PrologStage][Def];
694 if (np <= PrologStage)
695 PhiOp1 = VRMap[PrologStage - np][Def];
696 if (!InKernel) {
697 if (PrevStage == LastStageNum && np == 0)
698 PhiOp2 = VRMap[LastStageNum][Def];
699 else
700 PhiOp2 = VRMapPhi[PrevStage - np][Def];
701 }
702
703 const TargetRegisterClass *RC = MRI.getRegClass(Def);
704 Register NewReg = MRI.createVirtualRegister(RC);
705
706 MachineInstrBuilder NewPhi =
707 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
708 TII->get(TargetOpcode::PHI), NewReg);
709 NewPhi.addReg(PhiOp1).addMBB(BB1);
710 NewPhi.addReg(PhiOp2).addMBB(BB2);
711 LIS.InsertMachineInstrInMaps(*NewPhi);
712 if (np == 0)
713 InstrMap[NewPhi] = &*BBI;
714
715 // Rewrite uses and update the map. The actions depend upon whether
716 // we generating code for the kernel or epilog blocks.
717 if (InKernel) {
718 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
719 NewReg);
720 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
721 NewReg);
722
723 PhiOp2 = NewReg;
724 VRMapPhi[PrevStage - np - 1][Def] = NewReg;
725 } else {
726 VRMapPhi[CurStageNum - np][Def] = NewReg;
727 if (np == NumPhis - 1)
728 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
729 NewReg);
730 }
731 if (IsLast && np == NumPhis - 1)
732 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
733 }
734 }
735 }
736}
737
738/// Remove instructions that generate values with no uses.
739/// Typically, these are induction variable operations that generate values
740/// used in the loop itself. A dead instruction has a definition with
741/// no uses, or uses that occur in the original loop only.
742void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
743 MBBVectorTy &EpilogBBs) {
744 // For each epilog block, check that the value defined by each instruction
745 // is used. If not, delete it.
746 for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
748 ME = MBB->instr_rend();
749 MI != ME;) {
750 // From DeadMachineInstructionElem. Don't delete inline assembly.
751 if (MI->isInlineAsm()) {
752 ++MI;
753 continue;
754 }
755 bool SawStore = false;
756 // Check if it's safe to remove the instruction due to side effects.
757 // We can, and want to, remove Phis here.
758 if (!MI->isSafeToMove(SawStore) && !MI->isPHI()) {
759 ++MI;
760 continue;
761 }
762 bool used = true;
763 for (const MachineOperand &MO : MI->all_defs()) {
764 Register reg = MO.getReg();
765 // Assume physical registers are used, unless they are marked dead.
766 if (reg.isPhysical()) {
767 used = !MO.isDead();
768 if (used)
769 break;
770 continue;
771 }
772 unsigned realUses = 0;
773 for (const MachineOperand &U : MRI.use_operands(reg)) {
774 // Check if there are any uses that occur only in the original
775 // loop. If so, that's not a real use.
776 if (U.getParent()->getParent() != BB) {
777 realUses++;
778 used = true;
779 break;
780 }
781 }
782 if (realUses > 0)
783 break;
784 used = false;
785 }
786 if (!used) {
788 MI++->eraseFromParent();
789 continue;
790 }
791 ++MI;
792 }
793 // In the kernel block, check if we can remove a Phi that generates a value
794 // used in an instruction removed in the epilog block.
795 for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
796 Register reg = MI.getOperand(0).getReg();
797 if (MRI.use_begin(reg) == MRI.use_end()) {
799 MI.eraseFromParent();
800 }
801 }
802}
803
804/// For loop carried definitions, we split the lifetime of a virtual register
805/// that has uses past the definition in the next iteration. A copy with a new
806/// virtual register is inserted before the definition, which helps with
807/// generating a better register assignment.
808///
809/// v1 = phi(a, v2) v1 = phi(a, v2)
810/// v2 = phi(b, v3) v2 = phi(b, v3)
811/// v3 = .. v4 = copy v1
812/// .. = V1 v3 = ..
813/// .. = v4
814void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
815 MBBVectorTy &EpilogBBs) {
817 for (auto &PHI : KernelBB->phis()) {
818 Register Def = PHI.getOperand(0).getReg();
819 // Check for any Phi definition that used as an operand of another Phi
820 // in the same block.
822 E = MRI.use_instr_end();
823 I != E; ++I) {
824 if (I->isPHI() && I->getParent() == KernelBB) {
825 // Get the loop carried definition.
826 Register LCDef = getLoopPhiReg(PHI, KernelBB);
827 if (!LCDef)
828 continue;
829 MachineInstr *MI = MRI.getVRegDef(LCDef);
830 if (!MI || MI->getParent() != KernelBB || MI->isPHI())
831 continue;
832 // Search through the rest of the block looking for uses of the Phi
833 // definition. If one occurs, then split the lifetime.
834 Register SplitReg;
836 KernelBB->instr_end()))
837 if (BBJ.readsRegister(Def, /*TRI=*/nullptr)) {
838 // We split the lifetime when we find the first use.
839 if (!SplitReg) {
840 SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
841 MachineInstr *newCopy =
842 BuildMI(*KernelBB, MI, MI->getDebugLoc(),
843 TII->get(TargetOpcode::COPY), SplitReg)
844 .addReg(Def);
845 LIS.InsertMachineInstrInMaps(*newCopy);
846 }
847 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
848 }
849 if (!SplitReg)
850 continue;
851 // Search through each of the epilog blocks for any uses to be renamed.
852 for (auto &Epilog : EpilogBBs)
853 for (auto &I : *Epilog)
854 if (I.readsRegister(Def, /*TRI=*/nullptr))
855 I.substituteRegister(Def, SplitReg, 0, *TRI);
856 break;
857 }
858 }
859 }
860}
861
862/// Remove the incoming block from the Phis in a basic block.
864 for (MachineInstr &MI : *BB) {
865 if (!MI.isPHI())
866 break;
867 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
868 if (MI.getOperand(i + 1).getMBB() == Incoming) {
869 MI.removeOperand(i + 1);
870 MI.removeOperand(i);
871 break;
872 }
873 }
874}
875
876/// Create branches from each prolog basic block to the appropriate epilog
877/// block. These edges are needed if the loop ends before reaching the
878/// kernel.
879void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
880 MBBVectorTy &PrologBBs,
881 MachineBasicBlock *KernelBB,
882 MBBVectorTy &EpilogBBs,
883 ValueMapTy *VRMap) {
884 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
885 MachineBasicBlock *LastPro = KernelBB;
886 MachineBasicBlock *LastEpi = KernelBB;
887
888 // Start from the blocks connected to the kernel and work "out"
889 // to the first prolog and the last epilog blocks.
890 unsigned MaxIter = PrologBBs.size() - 1;
891 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
892 // Add branches to the prolog that go to the corresponding
893 // epilog, and the fall-thru prolog/kernel block.
894 MachineBasicBlock *Prolog = PrologBBs[j];
895 MachineBasicBlock *Epilog = EpilogBBs[i];
896
898 std::optional<bool> StaticallyGreater =
899 LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
900 unsigned numAdded = 0;
901 if (!StaticallyGreater) {
902 Prolog->addSuccessor(Epilog);
903 numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
904 } else if (*StaticallyGreater == false) {
905 Prolog->addSuccessor(Epilog);
906 Prolog->removeSuccessor(LastPro);
907 LastEpi->removeSuccessor(Epilog);
908 numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
909 removePhis(Epilog, LastEpi);
910 // Remove the blocks that are no longer referenced.
911 if (LastPro != LastEpi) {
912 for (auto &MI : *LastEpi)
914 LastEpi->clear();
915 LastEpi->eraseFromParent();
916 }
917 if (LastPro == KernelBB) {
918 LoopInfo->disposed(&LIS);
919 NewKernel = nullptr;
920 }
921 for (auto &MI : *LastPro)
923 LastPro->clear();
924 LastPro->eraseFromParent();
925 } else {
926 numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
928 }
929 LastPro = Prolog;
930 LastEpi = Epilog;
932 E = Prolog->instr_rend();
933 I != E && numAdded > 0; ++I, --numAdded)
934 updateInstruction(&*I, false, j, 0, VRMap);
935 }
936
937 if (NewKernel) {
938 LoopInfo->setPreheader(PrologBBs[MaxIter]);
939 LoopInfo->adjustTripCount(-(MaxIter + 1));
940 }
941}
942
943/// Return true if we can compute the amount the instruction changes
944/// during each iteration. Set Delta to the amount of the change.
945bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
947 const MachineOperand *BaseOp;
948 int64_t Offset;
949 bool OffsetIsScalable;
950 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
951 return false;
952
953 // FIXME: This algorithm assumes instructions have fixed-size offsets.
954 if (OffsetIsScalable)
955 return false;
956
957 if (!BaseOp->isReg())
958 return false;
959
960 Register BaseReg = BaseOp->getReg();
961
963 // Check if there is a Phi. If so, get the definition in the loop.
964 MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
965 if (BaseDef && BaseDef->isPHI()) {
966 BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
967 BaseDef = MRI.getVRegDef(BaseReg);
968 }
969 if (!BaseDef)
970 return false;
971
972 int D = 0;
973 if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
974 return false;
975
976 Delta = D;
977 return true;
978}
979
980/// Update the memory operand with a new offset when the pipeliner
981/// generates a new copy of the instruction that refers to a
982/// different memory location.
983void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
984 MachineInstr &OldMI,
985 unsigned Num) {
986 if (Num == 0)
987 return;
988 // If the instruction has memory operands, then adjust the offset
989 // when the instruction appears in different stages.
990 if (NewMI.memoperands_empty())
991 return;
993 for (MachineMemOperand *MMO : NewMI.memoperands()) {
994 // TODO: Figure out whether isAtomic is really necessary (see D57601).
995 if (MMO->isVolatile() || MMO->isAtomic() ||
996 (MMO->isInvariant() && MMO->isDereferenceable()) ||
997 (!MMO->getValue())) {
998 NewMMOs.push_back(MMO);
999 continue;
1000 }
1001 unsigned Delta;
1002 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
1003 int64_t AdjOffset = Delta * Num;
1004 NewMMOs.push_back(
1005 MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
1006 } else {
1009 }
1010 }
1011 NewMI.setMemRefs(MF, NewMMOs);
1012}
1013
1014/// Clone the instruction for the new pipelined loop and update the
1015/// memory operands, if needed.
1016MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
1017 unsigned CurStageNum,
1018 unsigned InstStageNum) {
1019 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1020 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1021 return NewMI;
1022}
1023
1024/// Clone the instruction for the new pipelined loop. If needed, this
1025/// function updates the instruction using the values saved in the
1026/// InstrChanges structure.
1027MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1028 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1029 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1030 auto It = InstrChanges.find(OldMI);
1031 if (It != InstrChanges.end()) {
1032 std::pair<Register, int64_t> RegAndOffset = It->second;
1033 unsigned BasePos, OffsetPos;
1034 if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1035 return nullptr;
1036 int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
1037 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1038 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1039 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1040 NewMI->getOperand(OffsetPos).setImm(NewOffset);
1041 }
1042 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1043 return NewMI;
1044}
1045
1046/// Update the machine instruction with new virtual registers. This
1047/// function may change the definitions and/or uses.
1048void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1049 bool LastDef,
1050 unsigned CurStageNum,
1051 unsigned InstrStageNum,
1052 ValueMapTy *VRMap) {
1053 for (MachineOperand &MO : NewMI->operands()) {
1054 if (!MO.isReg() || !MO.getReg().isVirtual())
1055 continue;
1056 Register reg = MO.getReg();
1057 if (MO.isDef()) {
1058 // Create a new virtual register for the definition.
1059 const TargetRegisterClass *RC = MRI.getRegClass(reg);
1060 Register NewReg = MRI.createVirtualRegister(RC);
1061 MO.setReg(NewReg);
1062 VRMap[CurStageNum][reg] = NewReg;
1063 if (LastDef)
1064 replaceRegUsesAfterLoop(reg, NewReg, BB, MRI);
1065 } else if (MO.isUse()) {
1066 MachineInstr *Def = MRI.getVRegDef(reg);
1067 // Compute the stage that contains the last definition for instruction.
1068 int DefStageNum = Schedule.getStage(Def);
1069 unsigned StageNum = CurStageNum;
1070 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1071 // Compute the difference in stages between the defintion and the use.
1072 unsigned StageDiff = (InstrStageNum - DefStageNum);
1073 // Make an adjustment to get the last definition.
1074 StageNum -= StageDiff;
1075 }
1076 if (auto It = VRMap[StageNum].find(reg); It != VRMap[StageNum].end())
1077 MO.setReg(It->second);
1078 }
1079 }
1080}
1081
1082/// Return the instruction in the loop that defines the register.
1083/// If the definition is a Phi, then follow the Phi operand to
1084/// the instruction in the loop.
1085MachineInstr *ModuloScheduleExpander::findDefInLoop(Register Reg) {
1087 MachineInstr *Def = MRI.getVRegDef(Reg);
1088 while (Def->isPHI()) {
1089 if (!Visited.insert(Def).second)
1090 break;
1091 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1092 if (Def->getOperand(i + 1).getMBB() == BB) {
1093 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1094 break;
1095 }
1096 }
1097 return Def;
1098}
1099
1100/// Return the new name for the value from the previous stage.
1101Register ModuloScheduleExpander::getPrevMapVal(
1102 unsigned StageNum, unsigned PhiStage, Register LoopVal, unsigned LoopStage,
1103 ValueMapTy *VRMap, MachineBasicBlock *BB) {
1104 Register PrevVal;
1105 if (StageNum > PhiStage) {
1106 MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1107 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1108 // The name is defined in the previous stage.
1109 PrevVal = VRMap[StageNum - 1][LoopVal];
1110 else if (VRMap[StageNum].count(LoopVal))
1111 // The previous name is defined in the current stage when the instruction
1112 // order is swapped.
1113 PrevVal = VRMap[StageNum][LoopVal];
1114 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1115 // The loop value hasn't yet been scheduled.
1116 PrevVal = LoopVal;
1117 else if (StageNum == PhiStage + 1)
1118 // The loop value is another phi, which has not been scheduled.
1119 PrevVal = getInitPhiReg(*LoopInst, BB);
1120 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1121 // The loop value is another phi, which has been scheduled.
1122 PrevVal =
1123 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1124 LoopStage, VRMap, BB);
1125 }
1126 return PrevVal;
1127}
1128
1129/// Rewrite the Phi values in the specified block to use the mappings
1130/// from the initial operand. Once the Phi is scheduled, we switch
1131/// to using the loop value instead of the Phi value, so those names
1132/// do not need to be rewritten.
1133void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1134 unsigned StageNum,
1135 ValueMapTy *VRMap,
1136 InstrMapTy &InstrMap) {
1137 for (auto &PHI : BB->phis()) {
1138 Register InitVal;
1139 Register LoopVal;
1140 getPhiRegs(PHI, BB, InitVal, LoopVal);
1141 Register PhiDef = PHI.getOperand(0).getReg();
1142
1143 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1144 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1145 unsigned NumPhis = getStagesForPhi(PhiDef);
1146 if (NumPhis > StageNum)
1147 NumPhis = StageNum;
1148 for (unsigned np = 0; np <= NumPhis; ++np) {
1149 Register NewVal =
1150 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1151 if (!NewVal)
1152 NewVal = InitVal;
1153 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1154 NewVal);
1155 }
1156 }
1157}
1158
1159/// Rewrite a previously scheduled instruction to use the register value
1160/// from the new instruction. Make sure the instruction occurs in the
1161/// basic block, and we don't change the uses in the new instruction.
1162void ModuloScheduleExpander::rewriteScheduledInstr(
1163 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1164 unsigned PhiNum, MachineInstr *Phi, Register OldReg, Register NewReg,
1165 Register PrevReg) {
1166 bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1167 int StagePhi = Schedule.getStage(Phi) + PhiNum;
1168 // Rewrite uses that have been scheduled already to use the new
1169 // Phi register.
1170 for (MachineOperand &UseOp :
1171 llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
1172 MachineInstr *UseMI = UseOp.getParent();
1173 if (UseMI->getParent() != BB)
1174 continue;
1175 if (UseMI->isPHI()) {
1176 if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1177 continue;
1178 if (getLoopPhiReg(*UseMI, BB) != OldReg)
1179 continue;
1180 }
1181 InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1182 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1183 MachineInstr *OrigMI = OrigInstr->second;
1184 int StageSched = Schedule.getStage(OrigMI);
1185 int CycleSched = Schedule.getCycle(OrigMI);
1186 Register ReplaceReg;
1187 // This is the stage for the scheduled instruction.
1188 if (StagePhi == StageSched && Phi->isPHI()) {
1189 int CyclePhi = Schedule.getCycle(Phi);
1190 if (PrevReg && InProlog)
1191 ReplaceReg = PrevReg;
1192 else if (PrevReg && !isLoopCarried(*Phi) &&
1193 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1194 ReplaceReg = PrevReg;
1195 else
1196 ReplaceReg = NewReg;
1197 }
1198 // The scheduled instruction occurs before the scheduled Phi, and the
1199 // Phi is not loop carried.
1200 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1201 ReplaceReg = NewReg;
1202 if (StagePhi > StageSched && Phi->isPHI())
1203 ReplaceReg = NewReg;
1204 if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1205 ReplaceReg = NewReg;
1206 if (ReplaceReg) {
1207 const TargetRegisterClass *NRC =
1208 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1209 if (NRC)
1210 UseOp.setReg(ReplaceReg);
1211 else {
1212 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1213 MachineInstr *newCopy = BuildMI(*BB, UseMI, UseMI->getDebugLoc(),
1214 TII->get(TargetOpcode::COPY), SplitReg)
1215 .addReg(ReplaceReg);
1216 UseOp.setReg(SplitReg);
1217 LIS.InsertMachineInstrInMaps(*newCopy);
1218 }
1219 }
1220 }
1221}
1222
1223bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1224 if (!Phi.isPHI())
1225 return false;
1226 int DefCycle = Schedule.getCycle(&Phi);
1227 int DefStage = Schedule.getStage(&Phi);
1228
1229 Register InitVal;
1230 Register LoopVal;
1231 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1232 MachineInstr *Use = MRI.getVRegDef(LoopVal);
1233 if (!Use || Use->isPHI())
1234 return true;
1235 int LoopCycle = Schedule.getCycle(Use);
1236 int LoopStage = Schedule.getStage(Use);
1237 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1238}
1239
1240//===----------------------------------------------------------------------===//
1241// PeelingModuloScheduleExpander implementation
1242//===----------------------------------------------------------------------===//
1243// This is a reimplementation of ModuloScheduleExpander that works by creating
1244// a fully correct steady-state kernel and peeling off the prolog and epilogs.
1245//===----------------------------------------------------------------------===//
1246
1247namespace {
1248// Remove any dead phis in MBB. Dead phis either have only one block as input
1249// (in which case they are the identity) or have no uses.
1250void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1251 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1252 bool Changed = true;
1253 while (Changed) {
1254 Changed = false;
1256 assert(MI.isPHI());
1257 if (MRI.use_empty(MI.getOperand(0).getReg())) {
1258 if (LIS)
1260 MI.eraseFromParent();
1261 Changed = true;
1262 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1263 const TargetRegisterClass *ConstrainRegClass =
1264 MRI.constrainRegClass(MI.getOperand(1).getReg(),
1265 MRI.getRegClass(MI.getOperand(0).getReg()));
1266 assert(ConstrainRegClass &&
1267 "Expected a valid constrained register class!");
1268 (void)ConstrainRegClass;
1269 MRI.replaceRegWith(MI.getOperand(0).getReg(),
1270 MI.getOperand(1).getReg());
1271 if (LIS)
1273 MI.eraseFromParent();
1274 Changed = true;
1275 }
1276 }
1277 }
1278}
1279
1280/// Rewrites the kernel block in-place to adhere to the given schedule.
1281/// KernelRewriter holds all of the state required to perform the rewriting.
1282class KernelRewriter {
1283 ModuloSchedule &S;
1285 MachineBasicBlock *PreheaderBB, *ExitBB;
1287 const TargetInstrInfo *TII;
1288 LiveIntervals *LIS;
1289
1290 // Map from register class to canonical undef register for that class.
1292 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1293 // this map is only used when InitReg is non-undef.
1295 // Map from LoopReg to phi register where the InitReg is undef.
1297
1298 // Reg is used by MI. Return the new register MI should use to adhere to the
1299 // schedule. Insert phis as necessary.
1300 Register remapUse(Register Reg, MachineInstr &MI);
1301 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1302 // If InitReg is not given it is chosen arbitrarily. It will either be undef
1303 // or will be chosen so as to share another phi.
1304 Register phi(Register LoopReg, std::optional<Register> InitReg = {},
1305 const TargetRegisterClass *RC = nullptr);
1306 // Create an undef register of the given register class.
1307 Register undef(const TargetRegisterClass *RC);
1308
1309public:
1310 KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
1311 LiveIntervals *LIS = nullptr);
1312 void rewrite();
1313};
1314} // namespace
1315
1316KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1317 MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1318 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1319 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1320 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1321 PreheaderBB = *BB->pred_begin();
1322 if (PreheaderBB == BB)
1323 PreheaderBB = *std::next(BB->pred_begin());
1324}
1325
1326void KernelRewriter::rewrite() {
1327 // Rearrange the loop to be in schedule order. Note that the schedule may
1328 // contain instructions that are not owned by the loop block (InstrChanges and
1329 // friends), so we gracefully handle unowned instructions and delete any
1330 // instructions that weren't in the schedule.
1331 auto InsertPt = BB->getFirstTerminator();
1332 MachineInstr *FirstMI = nullptr;
1333 for (MachineInstr *MI : S.getInstructions()) {
1334 if (MI->isPHI())
1335 continue;
1336 if (MI->getParent())
1337 MI->removeFromParent();
1338 BB->insert(InsertPt, MI);
1339 if (!FirstMI)
1340 FirstMI = MI;
1341 }
1342 assert(FirstMI && "Failed to find first MI in schedule");
1343
1344 // At this point all of the scheduled instructions are between FirstMI
1345 // and the end of the block. Kill from the first non-phi to FirstMI.
1346 for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1347 if (LIS)
1349 (I++)->eraseFromParent();
1350 }
1351
1352 // Now remap every instruction in the loop.
1353 for (MachineInstr &MI : *BB) {
1354 if (MI.isPHI() || MI.isTerminator())
1355 continue;
1356 for (MachineOperand &MO : MI.uses()) {
1357 if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1358 continue;
1359 Register Reg = remapUse(MO.getReg(), MI);
1360 MO.setReg(Reg);
1361 }
1362 }
1363 EliminateDeadPhis(BB, MRI, LIS);
1364
1365 // Ensure a phi exists for all instructions that are either referenced by
1366 // an illegal phi or by an instruction outside the loop. This allows us to
1367 // treat remaps of these values the same as "normal" values that come from
1368 // loop-carried phis.
1369 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1370 if (MI->isPHI()) {
1371 Register R = MI->getOperand(0).getReg();
1372 phi(R);
1373 continue;
1374 }
1375
1376 for (MachineOperand &Def : MI->defs()) {
1377 for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1378 if (MI.getParent() != BB) {
1379 phi(Def.getReg());
1380 break;
1381 }
1382 }
1383 }
1384 }
1385}
1386
1387Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1388 MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1389 if (!Producer)
1390 return Reg;
1391
1392 int ConsumerStage = S.getStage(&MI);
1393 if (!Producer->isPHI()) {
1394 // Non-phi producers are simple to remap. Insert as many phis as the
1395 // difference between the consumer and producer stages.
1396 if (Producer->getParent() != BB)
1397 // Producer was not inside the loop. Use the register as-is.
1398 return Reg;
1399 int ProducerStage = S.getStage(Producer);
1400 assert(ConsumerStage != -1 &&
1401 "In-loop consumer should always be scheduled!");
1402 assert(ConsumerStage >= ProducerStage);
1403 unsigned StageDiff = ConsumerStage - ProducerStage;
1404
1405 for (unsigned I = 0; I < StageDiff; ++I)
1406 Reg = phi(Reg);
1407 return Reg;
1408 }
1409
1410 // First, dive through the phi chain to find the defaults for the generated
1411 // phis.
1413 Register LoopReg = Reg;
1414 auto LoopProducer = Producer;
1415 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1416 LoopReg = getLoopPhiReg(*LoopProducer, BB);
1417 Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1418 LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1419 assert(LoopProducer);
1420 }
1421 int LoopProducerStage = S.getStage(LoopProducer);
1422
1423 std::optional<Register> IllegalPhiDefault;
1424
1425 if (LoopProducerStage == -1) {
1426 // Do nothing.
1427 } else if (LoopProducerStage > ConsumerStage) {
1428 // This schedule is only representable if ProducerStage == ConsumerStage+1.
1429 // In addition, Consumer's cycle must be scheduled after Producer in the
1430 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1431 // functions.
1432#ifndef NDEBUG // Silence unused variables in non-asserts mode.
1433 int LoopProducerCycle = S.getCycle(LoopProducer);
1434 int ConsumerCycle = S.getCycle(&MI);
1435#endif
1436 assert(LoopProducerCycle <= ConsumerCycle);
1437 assert(LoopProducerStage == ConsumerStage + 1);
1438 // Peel off the first phi from Defaults and insert a phi between producer
1439 // and consumer. This phi will not be at the front of the block so we
1440 // consider it illegal. It will only exist during the rewrite process; it
1441 // needs to exist while we peel off prologs because these could take the
1442 // default value. After that we can replace all uses with the loop producer
1443 // value.
1444 IllegalPhiDefault = Defaults.front();
1445 Defaults.erase(Defaults.begin());
1446 } else {
1447 assert(ConsumerStage >= LoopProducerStage);
1448 int StageDiff = ConsumerStage - LoopProducerStage;
1449 if (StageDiff > 0) {
1450 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1451 << " to " << (Defaults.size() + StageDiff) << "\n");
1452 // If we need more phis than we have defaults for, pad out with undefs for
1453 // the earliest phis, which are at the end of the defaults chain (the
1454 // chain is in reverse order).
1455 Defaults.resize(Defaults.size() + StageDiff,
1456 Defaults.empty() ? std::optional<Register>()
1457 : Defaults.back());
1458 }
1459 }
1460
1461 // Now we know the number of stages to jump back, insert the phi chain.
1462 auto DefaultI = Defaults.rbegin();
1463 while (DefaultI != Defaults.rend())
1464 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1465
1466 if (IllegalPhiDefault) {
1467 // The consumer optionally consumes LoopProducer in the same iteration
1468 // (because the producer is scheduled at an earlier cycle than the consumer)
1469 // or the initial value. To facilitate this we create an illegal block here
1470 // by embedding a phi in the middle of the block. We will fix this up
1471 // immediately prior to pruning.
1472 auto RC = MRI.getRegClass(Reg);
1473 Register R = MRI.createVirtualRegister(RC);
1474 MachineInstr *IllegalPhi =
1475 BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1476 .addReg(*IllegalPhiDefault)
1477 .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1478 .addReg(LoopReg)
1479 .addMBB(BB); // Block choice is arbitrary and has no effect.
1480 // Illegal phi should belong to the producer stage so that it can be
1481 // filtered correctly during peeling.
1482 S.setStage(IllegalPhi, LoopProducerStage);
1483 return R;
1484 }
1485
1486 return LoopReg;
1487}
1488
1489Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg,
1490 const TargetRegisterClass *RC) {
1491 // If the init register is not undef, try and find an existing phi.
1492 if (InitReg) {
1493 auto I = Phis.find({LoopReg, *InitReg});
1494 if (I != Phis.end())
1495 return I->second;
1496 } else {
1497 for (auto &KV : Phis) {
1498 if (KV.first.first == LoopReg)
1499 return KV.second;
1500 }
1501 }
1502
1503 // InitReg is either undef or no existing phi takes InitReg as input. Try and
1504 // find a phi that takes undef as input.
1505 auto I = UndefPhis.find(LoopReg);
1506 if (I != UndefPhis.end()) {
1507 Register R = I->second;
1508 if (!InitReg)
1509 // Found a phi taking undef as input, and this input is undef so return
1510 // without any more changes.
1511 return R;
1512 // Found a phi taking undef as input, so rewrite it to take InitReg.
1513 MachineInstr *MI = MRI.getVRegDef(R);
1514 MI->getOperand(1).setReg(*InitReg);
1515 Phis.insert({{LoopReg, *InitReg}, R});
1516 const TargetRegisterClass *ConstrainRegClass =
1517 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1518 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1519 (void)ConstrainRegClass;
1520 UndefPhis.erase(I);
1521 return R;
1522 }
1523
1524 // Failed to find any existing phi to reuse, so create a new one.
1525 if (!RC)
1526 RC = MRI.getRegClass(LoopReg);
1527 Register R = MRI.createVirtualRegister(RC);
1528 if (InitReg) {
1529 const TargetRegisterClass *ConstrainRegClass =
1530 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1531 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1532 (void)ConstrainRegClass;
1533 }
1534 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1535 .addReg(InitReg ? *InitReg : undef(RC))
1536 .addMBB(PreheaderBB)
1537 .addReg(LoopReg)
1538 .addMBB(BB);
1539 if (!InitReg)
1540 UndefPhis[LoopReg] = R;
1541 else
1542 Phis[{LoopReg, *InitReg}] = R;
1543 return R;
1544}
1545
1546Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1547 Register &R = Undefs[RC];
1548 if (R == 0) {
1549 // Create an IMPLICIT_DEF that defines this register if we need it.
1550 // All uses of this should be removed by the time we have finished unrolling
1551 // prologs and epilogs.
1552 R = MRI.createVirtualRegister(RC);
1553 auto *InsertBB = &PreheaderBB->getParent()->front();
1554 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1555 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1556 }
1557 return R;
1558}
1559
1560namespace {
1561/// Describes an operand in the kernel of a pipelined loop. Characteristics of
1562/// the operand are discovered, such as how many in-loop PHIs it has to jump
1563/// through and defaults for these phis.
1564class KernelOperandInfo {
1567 SmallVector<Register, 4> PhiDefaults;
1570
1571public:
1572 KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1573 const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1574 : MRI(MRI) {
1575 Source = MO;
1576 BB = MO->getParent()->getParent();
1577 while (isRegInLoop(MO)) {
1578 MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1579 if (MI->isFullCopy()) {
1580 MO = &MI->getOperand(1);
1581 continue;
1582 }
1583 if (!MI->isPHI())
1584 break;
1585 // If this is an illegal phi, don't count it in distance.
1586 if (IllegalPhis.count(MI)) {
1587 MO = &MI->getOperand(3);
1588 continue;
1589 }
1590
1592 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1593 : &MI->getOperand(3);
1594 PhiDefaults.push_back(Default);
1595 }
1596 Target = MO;
1597 }
1598
1599 bool operator==(const KernelOperandInfo &Other) const {
1600 return PhiDefaults.size() == Other.PhiDefaults.size();
1601 }
1602
1603 void print(raw_ostream &OS) const {
1604 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1605 << *Source->getParent();
1606 }
1607
1608private:
1609 bool isRegInLoop(MachineOperand *MO) {
1610 return MO->isReg() && MO->getReg().isVirtual() &&
1611 MRI.getVRegDef(MO->getReg())->getParent() == BB;
1612 }
1613};
1614} // namespace
1615
1619 if (LPD == LPD_Front)
1620 PeeledFront.push_back(NewBB);
1621 else
1622 PeeledBack.push_front(NewBB);
1623 for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1624 ++I, ++NI) {
1625 CanonicalMIs[&*I] = &*I;
1626 CanonicalMIs[&*NI] = &*I;
1627 BlockMIs[{NewBB, &*I}] = &*NI;
1628 BlockMIs[{BB, &*I}] = &*I;
1629 }
1630 return NewBB;
1631}
1632
1634 int MinStage) {
1635 for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1636 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1637 MachineInstr *MI = &*I++;
1638 int Stage = getStage(MI);
1639 if (Stage == -1 || Stage >= MinStage)
1640 continue;
1641
1642 for (MachineOperand &DefMO : MI->defs()) {
1644 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1645 // Only PHIs can use values from this block by construction.
1646 // Match with the equivalent PHI in B.
1647 assert(UseMI.isPHI());
1648 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1649 MI->getParent());
1650 Subs.emplace_back(&UseMI, Reg);
1651 }
1652 for (auto &Sub : Subs)
1653 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1655 }
1656 if (LIS)
1658 MI->eraseFromParent();
1659 }
1660}
1661
1663 MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1664 auto InsertPt = DestBB->getFirstNonPHI();
1667 llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
1668 if (MI.isPHI()) {
1669 // This is an illegal PHI. If we move any instructions using an illegal
1670 // PHI, we need to create a legal Phi.
1671 if (getStage(&MI) != Stage) {
1672 // The legal Phi is not necessary if the illegal phi's stage
1673 // is being moved.
1674 Register PhiR = MI.getOperand(0).getReg();
1675 auto RC = MRI.getRegClass(PhiR);
1677 MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1678 DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1679 .addReg(PhiR)
1680 .addMBB(SourceBB);
1681 BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
1683 Remaps[PhiR] = NR;
1684 }
1685 }
1686 if (getStage(&MI) != Stage)
1687 continue;
1688 MI.removeFromParent();
1689 DestBB->insert(InsertPt, &MI);
1690 auto *KernelMI = CanonicalMIs[&MI];
1691 BlockMIs[{DestBB, KernelMI}] = &MI;
1692 BlockMIs.erase({SourceBB, KernelMI});
1693 }
1695 for (MachineInstr &MI : DestBB->phis()) {
1696 assert(MI.getNumOperands() == 3);
1697 MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1698 // If the instruction referenced by the phi is moved inside the block
1699 // we don't need the phi anymore.
1700 if (getStage(Def) == Stage) {
1701 Register PhiReg = MI.getOperand(0).getReg();
1702 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),
1703 /*TRI=*/nullptr) != -1);
1704 MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1705 MI.getOperand(0).setReg(PhiReg);
1706 PhiToDelete.push_back(&MI);
1707 }
1708 }
1709 for (auto *P : PhiToDelete)
1710 P->eraseFromParent();
1711 InsertPt = DestBB->getFirstNonPHI();
1712 // Helper to clone Phi instructions into the destination block. We clone Phi
1713 // greedily to avoid combinatorial explosion of Phi instructions.
1714 auto clonePhi = [&](MachineInstr *Phi) {
1715 MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1716 DestBB->insert(InsertPt, NewMI);
1717 Register OrigR = Phi->getOperand(0).getReg();
1719 NewMI->getOperand(0).setReg(R);
1720 NewMI->getOperand(1).setReg(OrigR);
1721 NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1722 Remaps[OrigR] = R;
1723 CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1724 BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1726 return R;
1727 };
1728 for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1729 for (MachineOperand &MO : I->uses()) {
1730 if (!MO.isReg())
1731 continue;
1732 if (auto It = Remaps.find(MO.getReg()); It != Remaps.end())
1733 MO.setReg(It->second);
1734 else {
1735 // If we are using a phi from the source block we need to add a new phi
1736 // pointing to the old one.
1738 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1739 Register R = clonePhi(Use);
1740 MO.setReg(R);
1741 }
1742 }
1743 }
1744 }
1745}
1746
1749 MachineInstr *Phi) {
1750 unsigned distance = PhiNodeLoopIteration[Phi];
1751 MachineInstr *CanonicalUse = CanonicalPhi;
1752 Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1753 for (unsigned I = 0; I < distance; ++I) {
1754 assert(CanonicalUse->isPHI());
1755 assert(CanonicalUse->getNumOperands() == 5);
1756 unsigned LoopRegIdx = 3, InitRegIdx = 1;
1757 if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1758 std::swap(LoopRegIdx, InitRegIdx);
1759 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1760 CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1761 }
1762 return CanonicalUseReg;
1763}
1764
1766 BitVector LS(Schedule.getNumStages(), true);
1767 BitVector AS(Schedule.getNumStages(), true);
1768 LiveStages[BB] = LS;
1769 AvailableStages[BB] = AS;
1770
1771 // Peel out the prologs.
1772 LS.reset();
1773 for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1774 LS[I] = true;
1775 Prologs.push_back(peelKernel(LPD_Front));
1776 LiveStages[Prologs.back()] = LS;
1777 AvailableStages[Prologs.back()] = LS;
1778 }
1779
1780 // Create a block that will end up as the new loop exiting block (dominated by
1781 // all prologs and epilogs). It will only contain PHIs, in the same order as
1782 // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1783 // that the exiting block is a (sub) clone of BB. This in turn gives us the
1784 // property that any value deffed in BB but used outside of BB is used by a
1785 // PHI in the exiting block.
1787 EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1788 // Push out the epilogs, again in reverse order.
1789 // We can't assume anything about the minumum loop trip count at this point,
1790 // so emit a fairly complex epilog.
1791
1792 // We first peel number of stages minus one epilogue. Then we remove dead
1793 // stages and reorder instructions based on their stage. If we have 3 stages
1794 // we generate first:
1795 // E0[3, 2, 1]
1796 // E1[3', 2']
1797 // E2[3'']
1798 // And then we move instructions based on their stages to have:
1799 // E0[3]
1800 // E1[2, 3']
1801 // E2[1, 2', 3'']
1802 // The transformation is legal because we only move instructions past
1803 // instructions of a previous loop iteration.
1804 for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1805 Epilogs.push_back(peelKernel(LPD_Back));
1806 MachineBasicBlock *B = Epilogs.back();
1808 // Keep track at which iteration each phi belongs to. We need it to know
1809 // what version of the variable to use during prologue/epilogue stitching.
1810 EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1811 for (MachineInstr &Phi : B->phis())
1813 }
1814 for (size_t I = 0; I < Epilogs.size(); I++) {
1815 LS.reset();
1816 for (size_t J = I; J < Epilogs.size(); J++) {
1817 int Iteration = J;
1818 unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1819 // Move stage one block at a time so that Phi nodes are updated correctly.
1820 for (size_t K = Iteration; K > I; K--)
1821 moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
1822 LS[Stage] = true;
1823 }
1824 LiveStages[Epilogs[I]] = LS;
1825 AvailableStages[Epilogs[I]] = AS;
1826 }
1827
1828 // Now we've defined all the prolog and epilog blocks as a fallthrough
1829 // sequence, add the edges that will be followed if the loop trip count is
1830 // lower than the number of stages (connecting prologs directly with epilogs).
1831 auto PI = Prologs.begin();
1832 auto EI = Epilogs.begin();
1833 assert(Prologs.size() == Epilogs.size());
1834 for (; PI != Prologs.end(); ++PI, ++EI) {
1835 MachineBasicBlock *Pred = *(*EI)->pred_begin();
1836 (*PI)->addSuccessor(*EI);
1837 for (MachineInstr &MI : (*EI)->phis()) {
1838 Register Reg = MI.getOperand(1).getReg();
1840 if (Use && Use->getParent() == Pred) {
1841 MachineInstr *CanonicalUse = CanonicalMIs[Use];
1842 if (CanonicalUse->isPHI()) {
1843 // If the use comes from a phi we need to skip as many phi as the
1844 // distance between the epilogue and the kernel. Trace through the phi
1845 // chain to find the right value.
1846 Reg = getPhiCanonicalReg(CanonicalUse, Use);
1847 }
1848 Reg = getEquivalentRegisterIn(Reg, *PI);
1849 }
1850 MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1851 MI.addOperand(MachineOperand::CreateMBB(*PI));
1852 }
1853 }
1854
1855 // Create a list of all blocks in order.
1858 Blocks.push_back(BB);
1860
1861 // Iterate in reverse order over all instructions, remapping as we go.
1862 for (MachineBasicBlock *B : reverse(Blocks)) {
1863 for (auto I = B->instr_rbegin();
1864 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1866 rewriteUsesOf(&*MI);
1867 }
1868 }
1869 for (auto *MI : IllegalPhisToDelete) {
1870 if (LIS)
1872 MI->eraseFromParent();
1873 }
1875
1876 // Now all remapping has been done, we're free to optimize the generated code.
1878 EliminateDeadPhis(B, MRI, LIS);
1879 EliminateDeadPhis(ExitingBB, MRI, LIS);
1880}
1881
1884 MachineBasicBlock *Exit = *BB->succ_begin();
1885 if (Exit == BB)
1886 Exit = *std::next(BB->succ_begin());
1887
1889 MF.insert(std::next(BB->getIterator()), NewBB);
1890
1891 // Clone all phis in BB into NewBB and rewrite.
1892 for (MachineInstr &MI : BB->phis()) {
1893 auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1894 Register OldR = MI.getOperand(3).getReg();
1897 for (MachineInstr &Use : MRI.use_instructions(OldR))
1898 if (Use.getParent() != BB)
1899 Uses.push_back(&Use);
1900 for (MachineInstr *Use : Uses)
1901 Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1903 MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1904 .addReg(OldR)
1905 .addMBB(BB);
1906 BlockMIs[{NewBB, &MI}] = NI;
1907 CanonicalMIs[NI] = &MI;
1908 }
1909 BB->replaceSuccessor(Exit, NewBB);
1910 Exit->replacePhiUsesWith(BB, NewBB);
1911 NewBB->addSuccessor(Exit);
1912
1913 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1915 bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1916 (void)CanAnalyzeBr;
1917 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1918 TII->removeBranch(*BB);
1919 TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1920 Cond, DebugLoc());
1921 TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1922 return NewBB;
1923}
1924
1927 MachineBasicBlock *BB) {
1929 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
1930 return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1931}
1932
1934 if (MI->isPHI()) {
1935 // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1936 // and it is produced by this block.
1937 Register PhiR = MI->getOperand(0).getReg();
1938 Register R = MI->getOperand(3).getReg();
1939 int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1940 if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1941 R = MI->getOperand(1).getReg();
1942 MRI.setRegClass(R, MRI.getRegClass(PhiR));
1943 MRI.replaceRegWith(PhiR, R);
1944 // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1945 // later to figure out how to remap registers.
1946 MI->getOperand(0).setReg(PhiR);
1948 return;
1949 }
1950
1951 int Stage = getStage(MI);
1952 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1953 LiveStages[MI->getParent()].test(Stage))
1954 // Instruction is live, no rewriting to do.
1955 return;
1956
1957 for (MachineOperand &DefMO : MI->defs()) {
1959 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1960 // Only PHIs can use values from this block by construction.
1961 // Match with the equivalent PHI in B.
1962 assert(UseMI.isPHI());
1963 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1964 MI->getParent());
1965 Subs.emplace_back(&UseMI, Reg);
1966 }
1967 for (auto &Sub : Subs)
1968 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1970 }
1971 if (LIS)
1973 MI->eraseFromParent();
1974}
1975
1977 // Work outwards from the kernel.
1978 bool KernelDisposed = false;
1979 int TC = Schedule.getNumStages() - 1;
1980 for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1981 ++PI, ++EI, --TC) {
1983 MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1987 std::optional<bool> StaticallyGreater =
1988 LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1989 if (!StaticallyGreater) {
1990 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1991 // Dynamically branch based on Cond.
1992 TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1993 } else if (*StaticallyGreater == false) {
1994 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1995 // Prolog never falls through; branch to epilog and orphan interior
1996 // blocks. Leave it to unreachable-block-elim to clean up.
1997 Prolog->removeSuccessor(Fallthrough);
1998 for (MachineInstr &P : Fallthrough->phis()) {
1999 P.removeOperand(2);
2000 P.removeOperand(1);
2001 }
2003 KernelDisposed = true;
2004 } else {
2005 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
2006 // Prolog always falls through; remove incoming values in epilog.
2007 Prolog->removeSuccessor(Epilog);
2008 for (MachineInstr &P : Epilog->phis()) {
2009 P.removeOperand(4);
2010 P.removeOperand(3);
2011 }
2012 }
2013 }
2014
2015 if (!KernelDisposed) {
2016 LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
2017 LoopInfo->setPreheader(Prologs.back());
2018 } else {
2019 LoopInfo->disposed();
2020 }
2021}
2022
2024 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2025 KR.rewrite();
2026}
2027
2034
2035 rewriteKernel();
2037 fixupBranches();
2038}
2039
2043
2044 // Dump the schedule before we invalidate and remap all its instructions.
2045 // Stash it in a string so we can print it if we found an error.
2046 std::string ScheduleDump;
2047 raw_string_ostream OS(ScheduleDump);
2048 Schedule.print(OS);
2049 OS.flush();
2050
2051 // First, run the normal ModuleScheduleExpander. We don't support any
2052 // InstrChanges.
2053 assert(LIS && "Requires LiveIntervals!");
2056 MSE.expand();
2057 MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
2058 if (!ExpandedKernel) {
2059 // The expander optimized away the kernel. We can't do any useful checking.
2060 MSE.cleanup();
2061 return;
2062 }
2063 // Before running the KernelRewriter, re-add BB into the CFG.
2065
2066 // Now run the new expansion algorithm.
2067 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2068 KR.rewrite();
2070
2071 // Collect all illegal phis that the new algorithm created. We'll give these
2072 // to KernelOperandInfo.
2074 for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2075 if (NI->isPHI())
2076 IllegalPhis.insert(&*NI);
2077 }
2078
2079 // Co-iterate across both kernels. We expect them to be identical apart from
2080 // phis and full COPYs (we look through both).
2082 auto OI = ExpandedKernel->begin();
2083 auto NI = BB->begin();
2084 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2085 while (OI->isPHI() || OI->isFullCopy())
2086 ++OI;
2087 while (NI->isPHI() || NI->isFullCopy())
2088 ++NI;
2089 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2090 // Analyze every operand separately.
2091 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2092 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2093 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2094 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2095 }
2096
2097 bool Failed = false;
2098 for (auto &OldAndNew : KOIs) {
2099 if (OldAndNew.first == OldAndNew.second)
2100 continue;
2101 Failed = true;
2102 errs() << "Modulo kernel validation error: [\n";
2103 errs() << " [golden] ";
2104 OldAndNew.first.print(errs());
2105 errs() << " ";
2106 OldAndNew.second.print(errs());
2107 errs() << "]\n";
2108 }
2109
2110 if (Failed) {
2111 errs() << "Golden reference kernel:\n";
2112 ExpandedKernel->print(errs());
2113 errs() << "New kernel:\n";
2114 BB->print(errs());
2115 errs() << ScheduleDump;
2117 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2118 }
2119
2120 // Cleanup by removing BB from the CFG again as the original
2121 // ModuloScheduleExpander intended.
2123 MSE.cleanup();
2124}
2125
2126MachineInstr *ModuloScheduleExpanderMVE::cloneInstr(MachineInstr *OldMI) {
2127 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2128
2129 // TODO: Offset information needs to be corrected.
2130 NewMI->dropMemRefs(MF);
2131
2132 return NewMI;
2133}
2134
2135/// Create a dedicated exit for Loop. Exit is the original exit for Loop.
2136/// If it is already dedicated exit, return it. Otherwise, insert a new
2137/// block between them and return the new block.
2139 MachineBasicBlock *Exit,
2140 LiveIntervals &LIS) {
2141 if (Exit->pred_size() == 1)
2142 return Exit;
2143
2144 MachineFunction *MF = Loop->getParent();
2146
2147 MachineBasicBlock *NewExit =
2148 MF->CreateMachineBasicBlock(Loop->getBasicBlock());
2149 MF->insert(Loop->getIterator(), NewExit);
2150 LIS.insertMBBInMaps(NewExit);
2151
2152 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2154 TII->analyzeBranch(*Loop, TBB, FBB, Cond);
2155 if (TBB == Loop)
2156 FBB = NewExit;
2157 else if (FBB == Loop)
2158 TBB = NewExit;
2159 else
2160 llvm_unreachable("unexpected loop structure");
2162 TII->insertBranch(*Loop, TBB, FBB, Cond, DebugLoc());
2163 Loop->replaceSuccessor(Exit, NewExit);
2164 TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());
2165 NewExit->addSuccessor(Exit);
2166
2167 Exit->replacePhiUsesWith(Loop, NewExit);
2168
2169 return NewExit;
2170}
2171
2172/// Insert branch code into the end of MBB. It branches to GreaterThan if the
2173/// remaining trip count for instructions in LastStage0Insts is greater than
2174/// RequiredTC, and to Otherwise otherwise.
2175void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB,
2176 int RequiredTC,
2177 InstrMapTy &LastStage0Insts,
2178 MachineBasicBlock &GreaterThan,
2179 MachineBasicBlock &Otherwise) {
2181 LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,
2182 LastStage0Insts);
2183
2185 // Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and
2186 // FBB for optimal performance.
2187 if (TII->reverseBranchCondition(Cond))
2188 llvm_unreachable("can not reverse branch condition");
2189 TII->insertBranch(MBB, &Otherwise, &GreaterThan, Cond, DebugLoc());
2190 } else {
2191 TII->insertBranch(MBB, &GreaterThan, &Otherwise, Cond, DebugLoc());
2192 }
2193}
2194
2195/// Generate a pipelined loop that is unrolled by using MVE algorithm and any
2196/// other necessary blocks. The control flow is modified to execute the
2197/// pipelined loop if the trip count satisfies the condition, otherwise the
2198/// original loop. The original loop is also used to execute the remainder
2199/// iterations which occur due to unrolling.
2200void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2201 // The control flow for pipelining with MVE:
2202 //
2203 // OrigPreheader:
2204 // // The block that is originally the loop preheader
2205 // goto Check
2206 //
2207 // Check:
2208 // // Check whether the trip count satisfies the requirements to pipeline.
2209 // if (LoopCounter > NumStages + NumUnroll - 2)
2210 // // The minimum number of iterations to pipeline =
2211 // // iterations executed in prolog/epilog (NumStages-1) +
2212 // // iterations executed in one kernel run (NumUnroll)
2213 // goto Prolog
2214 // // fallback to the original loop
2215 // goto NewPreheader
2216 //
2217 // Prolog:
2218 // // All prolog stages. There are no direct branches to the epilogue.
2219 // goto NewKernel
2220 //
2221 // NewKernel:
2222 // // NumUnroll copies of the kernel
2223 // if (LoopCounter > MVE-1)
2224 // goto NewKernel
2225 // goto Epilog
2226 //
2227 // Epilog:
2228 // // All epilog stages.
2229 // if (LoopCounter > 0)
2230 // // The remainder is executed in the original loop
2231 // goto NewPreheader
2232 // goto NewExit
2233 //
2234 // NewPreheader:
2235 // // Newly created preheader for the original loop.
2236 // // The initial values of the phis in the loop are merged from two paths.
2237 // NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog
2238 // goto OrigKernel
2239 //
2240 // OrigKernel:
2241 // // The original loop block.
2242 // if (LoopCounter != 0)
2243 // goto OrigKernel
2244 // goto NewExit
2245 //
2246 // NewExit:
2247 // // Newly created dedicated exit for the original loop.
2248 // // Merge values which are referenced after the loop
2249 // Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog
2250 // goto OrigExit
2251 //
2252 // OrigExit:
2253 // // The block that is originally the loop exit.
2254 // // If it is already deicated exit, NewExit is not created.
2255
2256 // An example of where each stage is executed:
2257 // Assume #Stages 3, #MVE 4, #Iterations 12
2258 // Iter 0 1 2 3 4 5 6 7 8 9 10-11
2259 // -------------------------------------------------
2260 // Stage 0 Prolog#0
2261 // Stage 1 0 Prolog#1
2262 // Stage 2 1 0 Kernel Unroll#0 Iter#0
2263 // Stage 2 1 0 Kernel Unroll#1 Iter#0
2264 // Stage 2 1 0 Kernel Unroll#2 Iter#0
2265 // Stage 2 1 0 Kernel Unroll#3 Iter#0
2266 // Stage 2 1 0 Kernel Unroll#0 Iter#1
2267 // Stage 2 1 0 Kernel Unroll#1 Iter#1
2268 // Stage 2 1 0 Kernel Unroll#2 Iter#1
2269 // Stage 2 1 0 Kernel Unroll#3 Iter#1
2270 // Stage 2 1 Epilog#0
2271 // Stage 2 Epilog#1
2272 // Stage 0-2 OrigKernel
2273
2274 LoopInfo = TII->analyzeLoopForPipelining(OrigKernel);
2275 assert(LoopInfo && "Must be able to analyze loop!");
2276
2277 calcNumUnroll();
2278
2279 Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2280 Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2281 NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2282 Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2283 NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2284
2285 MF.insert(OrigKernel->getIterator(), Check);
2286 LIS.insertMBBInMaps(Check);
2287 MF.insert(OrigKernel->getIterator(), Prolog);
2288 LIS.insertMBBInMaps(Prolog);
2289 MF.insert(OrigKernel->getIterator(), NewKernel);
2290 LIS.insertMBBInMaps(NewKernel);
2291 MF.insert(OrigKernel->getIterator(), Epilog);
2292 LIS.insertMBBInMaps(Epilog);
2293 MF.insert(OrigKernel->getIterator(), NewPreheader);
2294 LIS.insertMBBInMaps(NewPreheader);
2295
2296 NewExit = createDedicatedExit(OrigKernel, OrigExit, LIS);
2297
2298 NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader);
2299 TII->insertUnconditionalBranch(*NewPreheader, OrigKernel, DebugLoc());
2300
2301 OrigPreheader->addSuccessor(Check);
2302 TII->removeBranch(*OrigPreheader);
2303 TII->insertUnconditionalBranch(*OrigPreheader, Check, DebugLoc());
2304
2305 Check->addSuccessor(Prolog);
2306 Check->addSuccessor(NewPreheader);
2307
2308 Prolog->addSuccessor(NewKernel);
2309
2310 NewKernel->addSuccessor(NewKernel);
2311 NewKernel->addSuccessor(Epilog);
2312
2313 Epilog->addSuccessor(NewPreheader);
2314 Epilog->addSuccessor(NewExit);
2315
2316 InstrMapTy LastStage0Insts;
2317 insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,
2318 LastStage0Insts, *Prolog, *NewPreheader);
2319
2320 // VRMaps map (prolog/kernel/epilog phase#, original register#) to new
2321 // register#
2322 SmallVector<ValueMapTy> PrologVRMap, KernelVRMap, EpilogVRMap;
2323 generateProlog(PrologVRMap);
2324 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
2325 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
2326}
2327
2328/// Replace MI's use operands according to the maps.
2329void ModuloScheduleExpanderMVE::updateInstrUse(
2330 MachineInstr *MI, int StageNum, int PhaseNum,
2332 SmallVectorImpl<ValueMapTy> *PrevVRMap) {
2333 // If MI is in the prolog/kernel/epilog block, CurVRMap is
2334 // PrologVRMap/KernelVRMap/EpilogVRMap respectively.
2335 // PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively.
2336 // Refer to the appropriate map according to the stage difference between
2337 // MI and the definition of an operand.
2338
2339 for (MachineOperand &UseMO : MI->uses()) {
2340 if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
2341 continue;
2342 int DiffStage = 0;
2343 Register OrigReg = UseMO.getReg();
2344 MachineInstr *DefInst = MRI.getVRegDef(OrigReg);
2345 if (!DefInst || DefInst->getParent() != OrigKernel)
2346 continue;
2347 Register InitReg;
2348 Register DefReg = OrigReg;
2349 if (DefInst->isPHI()) {
2350 ++DiffStage;
2351 Register LoopReg;
2352 getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
2353 // LoopReg is guaranteed to be defined within the loop by canApply()
2354 DefReg = LoopReg;
2355 DefInst = MRI.getVRegDef(LoopReg);
2356 }
2357 unsigned DefStageNum = Schedule.getStage(DefInst);
2358 DiffStage += StageNum - DefStageNum;
2359 Register NewReg;
2360 if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))
2361 // NewReg is defined in a previous phase of the same block
2362 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
2363 else if (!PrevVRMap)
2364 // Since this is the first iteration, refer the initial register of the
2365 // loop
2366 NewReg = InitReg;
2367 else
2368 // Cases where DiffStage is larger than PhaseNum.
2369 // If MI is in the kernel block, the value is defined by the previous
2370 // iteration and PhiVRMap is referenced. If MI is in the epilog block, the
2371 // value is defined in the kernel block and KernelVRMap is referenced.
2372 NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];
2373
2374 const TargetRegisterClass *NRC =
2375 MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));
2376 if (NRC)
2377 UseMO.setReg(NewReg);
2378 else {
2379 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2380 MachineInstr *NewCopy = BuildMI(*OrigKernel, MI, MI->getDebugLoc(),
2381 TII->get(TargetOpcode::COPY), SplitReg)
2382 .addReg(NewReg);
2383 LIS.InsertMachineInstrInMaps(*NewCopy);
2384 UseMO.setReg(SplitReg);
2385 }
2386 }
2387}
2388
2389/// Return a phi if Reg is referenced by the phi.
2390/// canApply() guarantees that at most only one such phi exists.
2392 for (MachineInstr &Phi : Loop->phis()) {
2393 Register InitVal, LoopVal;
2394 getPhiRegs(Phi, Loop, InitVal, LoopVal);
2395 if (LoopVal == Reg)
2396 return &Phi;
2397 }
2398 return nullptr;
2399}
2400
2401/// Generate phis for registers defined by OrigMI.
2402void ModuloScheduleExpanderMVE::generatePhi(
2403 MachineInstr *OrigMI, int UnrollNum,
2404 SmallVectorImpl<ValueMapTy> &PrologVRMap,
2405 SmallVectorImpl<ValueMapTy> &KernelVRMap,
2406 SmallVectorImpl<ValueMapTy> &PhiVRMap) {
2407 int StageNum = Schedule.getStage(OrigMI);
2408 bool UsePrologReg;
2409 if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
2410 UsePrologReg = true;
2411 else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)
2412 UsePrologReg = false;
2413 else
2414 return;
2415
2416 // Examples that show which stages are merged by phi.
2417 // Meaning of the symbol following the stage number:
2418 // a/b: Stages with the same letter are merged (UsePrologReg == true)
2419 // +: Merged with the initial value (UsePrologReg == false)
2420 // *: No phis required
2421 //
2422 // #Stages 3, #MVE 4
2423 // Iter 0 1 2 3 4 5 6 7 8
2424 // -----------------------------------------
2425 // Stage 0a Prolog#0
2426 // Stage 1a 0b Prolog#1
2427 // Stage 2* 1* 0* Kernel Unroll#0
2428 // Stage 2* 1* 0+ Kernel Unroll#1
2429 // Stage 2* 1+ 0a Kernel Unroll#2
2430 // Stage 2+ 1a 0b Kernel Unroll#3
2431 //
2432 // #Stages 3, #MVE 2
2433 // Iter 0 1 2 3 4 5 6 7 8
2434 // -----------------------------------------
2435 // Stage 0a Prolog#0
2436 // Stage 1a 0b Prolog#1
2437 // Stage 2* 1+ 0a Kernel Unroll#0
2438 // Stage 2+ 1a 0b Kernel Unroll#1
2439 //
2440 // #Stages 3, #MVE 1
2441 // Iter 0 1 2 3 4 5 6 7 8
2442 // -----------------------------------------
2443 // Stage 0* Prolog#0
2444 // Stage 1a 0b Prolog#1
2445 // Stage 2+ 1a 0b Kernel Unroll#0
2446
2447 for (MachineOperand &DefMO : OrigMI->defs()) {
2448 if (!DefMO.isReg() || DefMO.isDead())
2449 continue;
2450 Register OrigReg = DefMO.getReg();
2451 auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
2452 if (NewReg == KernelVRMap[UnrollNum].end())
2453 continue;
2454 Register CorrespondReg;
2455 if (UsePrologReg) {
2456 int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;
2457 CorrespondReg = PrologVRMap[PrologNum][OrigReg];
2458 } else {
2459 MachineInstr *Phi = getLoopPhiUser(OrigReg, OrigKernel);
2460 if (!Phi)
2461 continue;
2462 CorrespondReg = getInitPhiReg(*Phi, OrigKernel);
2463 }
2464
2465 assert(CorrespondReg.isValid());
2466 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2467 MachineInstr *NewPhi =
2468 BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(),
2469 TII->get(TargetOpcode::PHI), PhiReg)
2470 .addReg(NewReg->second)
2471 .addMBB(NewKernel)
2472 .addReg(CorrespondReg)
2473 .addMBB(Prolog);
2474 LIS.InsertMachineInstrInMaps(*NewPhi);
2475 PhiVRMap[UnrollNum][OrigReg] = PhiReg;
2476 }
2477}
2478
2479static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg,
2480 MachineBasicBlock *NewMBB) {
2481 for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {
2482 if (Phi.getOperand(Idx).getReg() == OrigReg) {
2483 Phi.getOperand(Idx).setReg(NewReg);
2484 Phi.getOperand(Idx + 1).setMBB(NewMBB);
2485 return;
2486 }
2487 }
2488}
2489
2490/// Generate phis that merge values from multiple routes
2491void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,
2492 Register NewReg) {
2493 SmallVector<MachineOperand *> UsesAfterLoop;
2495 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(OrigReg),
2496 E = MRI.use_end();
2497 I != E; ++I) {
2498 MachineOperand &O = *I;
2499 if (O.getParent()->getParent() != OrigKernel &&
2500 O.getParent()->getParent() != Prolog &&
2501 O.getParent()->getParent() != NewKernel &&
2502 O.getParent()->getParent() != Epilog)
2503 UsesAfterLoop.push_back(&O);
2504 if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())
2505 LoopPhis.push_back(O.getParent());
2506 }
2507
2508 // Merge the route that only execute the pipelined loop (when there are no
2509 // remaining iterations) with the route that execute the original loop.
2510 if (!UsesAfterLoop.empty()) {
2511 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2512 MachineInstr *NewPhi =
2513 BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(),
2514 TII->get(TargetOpcode::PHI), PhiReg)
2515 .addReg(OrigReg)
2516 .addMBB(OrigKernel)
2517 .addReg(NewReg)
2518 .addMBB(Epilog);
2519 LIS.InsertMachineInstrInMaps(*NewPhi);
2520
2521 for (MachineOperand *MO : UsesAfterLoop)
2522 MO->setReg(PhiReg);
2523
2524 // The interval of OrigReg is invalid and should be recalculated when
2525 // LiveInterval::getInterval() is called.
2526 if (LIS.hasInterval(OrigReg))
2527 LIS.removeInterval(OrigReg);
2528 }
2529
2530 // Merge routes from the pipelined loop and the bypassed route before the
2531 // original loop
2532 if (!LoopPhis.empty()) {
2533 for (MachineInstr *Phi : LoopPhis) {
2534 Register InitReg, LoopReg;
2535 getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
2536 Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));
2537 MachineInstr *NewPhi =
2538 BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(),
2539 Phi->getDebugLoc(), TII->get(TargetOpcode::PHI), NewInit)
2540 .addReg(InitReg)
2541 .addMBB(Check)
2542 .addReg(NewReg)
2543 .addMBB(Epilog);
2544 LIS.InsertMachineInstrInMaps(*NewPhi);
2545 replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);
2546 }
2547 }
2548}
2549
2550void ModuloScheduleExpanderMVE::generateProlog(
2551 SmallVectorImpl<ValueMapTy> &PrologVRMap) {
2552 PrologVRMap.clear();
2553 PrologVRMap.resize(Schedule.getNumStages() - 1);
2555 for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;
2556 ++PrologNum) {
2557 for (MachineInstr *MI : Schedule.getInstructions()) {
2558 if (MI->isPHI())
2559 continue;
2560 int StageNum = Schedule.getStage(MI);
2561 if (StageNum > PrologNum)
2562 continue;
2563 MachineInstr *NewMI = cloneInstr(MI);
2564 updateInstrDef(NewMI, PrologVRMap[PrologNum], false);
2565 NewMIMap[NewMI] = {PrologNum, StageNum};
2566 Prolog->push_back(NewMI);
2567 LIS.InsertMachineInstrInMaps(*NewMI);
2568 }
2569 }
2570
2571 for (auto I : NewMIMap) {
2572 MachineInstr *MI = I.first;
2573 int PrologNum = I.second.first;
2574 int StageNum = I.second.second;
2575 updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);
2576 }
2577
2578 LLVM_DEBUG({
2579 dbgs() << "prolog:\n";
2580 Prolog->dump();
2581 });
2582}
2583
2584void ModuloScheduleExpanderMVE::generateKernel(
2585 SmallVectorImpl<ValueMapTy> &PrologVRMap,
2586 SmallVectorImpl<ValueMapTy> &KernelVRMap, InstrMapTy &LastStage0Insts) {
2587 KernelVRMap.clear();
2588 KernelVRMap.resize(NumUnroll);
2589 SmallVector<ValueMapTy> PhiVRMap;
2590 PhiVRMap.resize(NumUnroll);
2592 for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
2593 for (MachineInstr *MI : Schedule.getInstructions()) {
2594 if (MI->isPHI())
2595 continue;
2596 int StageNum = Schedule.getStage(MI);
2597 MachineInstr *NewMI = cloneInstr(MI);
2598 if (UnrollNum == NumUnroll - 1)
2599 LastStage0Insts[MI] = NewMI;
2600 updateInstrDef(NewMI, KernelVRMap[UnrollNum],
2601 (UnrollNum == NumUnroll - 1 && StageNum == 0));
2602 generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
2603 NewMIMap[NewMI] = {UnrollNum, StageNum};
2604 NewKernel->push_back(NewMI);
2605 LIS.InsertMachineInstrInMaps(*NewMI);
2606 }
2607 }
2608
2609 for (auto I : NewMIMap) {
2610 MachineInstr *MI = I.first;
2611 int UnrollNum = I.second.first;
2612 int StageNum = I.second.second;
2613 updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
2614 }
2615
2616 // If remaining trip count is greater than NumUnroll-1, loop continues
2617 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
2618 *Epilog);
2619
2620 LLVM_DEBUG({
2621 dbgs() << "kernel:\n";
2622 NewKernel->dump();
2623 });
2624}
2625
2626void ModuloScheduleExpanderMVE::generateEpilog(
2627 SmallVectorImpl<ValueMapTy> &KernelVRMap,
2628 SmallVectorImpl<ValueMapTy> &EpilogVRMap, InstrMapTy &LastStage0Insts) {
2629 EpilogVRMap.clear();
2630 EpilogVRMap.resize(Schedule.getNumStages() - 1);
2632 for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;
2633 ++EpilogNum) {
2634 for (MachineInstr *MI : Schedule.getInstructions()) {
2635 if (MI->isPHI())
2636 continue;
2637 int StageNum = Schedule.getStage(MI);
2638 if (StageNum <= EpilogNum)
2639 continue;
2640 MachineInstr *NewMI = cloneInstr(MI);
2641 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
2642 NewMIMap[NewMI] = {EpilogNum, StageNum};
2643 Epilog->push_back(NewMI);
2644 LIS.InsertMachineInstrInMaps(*NewMI);
2645 }
2646 }
2647
2648 for (auto I : NewMIMap) {
2649 MachineInstr *MI = I.first;
2650 int EpilogNum = I.second.first;
2651 int StageNum = I.second.second;
2652 updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
2653 }
2654
2655 // If there are remaining iterations, they are executed in the original loop.
2656 // Instructions related to loop control, such as loop counter comparison,
2657 // are indicated by shouldIgnoreForPipelining() and are assumed to be placed
2658 // in stage 0. Thus, the map is for the last one in the kernel.
2659 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
2660
2661 LLVM_DEBUG({
2662 dbgs() << "epilog:\n";
2663 Epilog->dump();
2664 });
2665}
2666
2667/// Calculate the number of unroll required and set it to NumUnroll
2668void ModuloScheduleExpanderMVE::calcNumUnroll() {
2670 NumUnroll = 1;
2671 for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I)
2672 Inst2Idx[Schedule.getInstructions()[I]] = I;
2673
2674 for (MachineInstr *MI : Schedule.getInstructions()) {
2675 if (MI->isPHI())
2676 continue;
2677 int StageNum = Schedule.getStage(MI);
2678 for (const MachineOperand &MO : MI->uses()) {
2679 if (!MO.isReg() || !MO.getReg().isVirtual())
2680 continue;
2681 MachineInstr *DefMI = MRI.getVRegDef(MO.getReg());
2682 if (DefMI->getParent() != OrigKernel)
2683 continue;
2684
2685 int NumUnrollLocal = 1;
2686 if (DefMI->isPHI()) {
2687 ++NumUnrollLocal;
2688 // canApply() guarantees that DefMI is not phi and is an instruction in
2689 // the loop
2690 DefMI = MRI.getVRegDef(getLoopPhiReg(*DefMI, OrigKernel));
2691 }
2692 NumUnrollLocal += StageNum - Schedule.getStage(DefMI);
2693 if (Inst2Idx[MI] <= Inst2Idx[DefMI])
2694 --NumUnrollLocal;
2695 NumUnroll = std::max(NumUnroll, NumUnrollLocal);
2696 }
2697 }
2698 LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");
2699}
2700
2701/// Create new virtual registers for definitions of NewMI and update NewMI.
2702/// If the definitions are referenced after the pipelined loop, phis are
2703/// created to merge with other routes.
2704void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
2705 ValueMapTy &VRMap,
2706 bool LastDef) {
2707 for (MachineOperand &MO : NewMI->all_defs()) {
2708 if (!MO.getReg().isVirtual())
2709 continue;
2710 Register Reg = MO.getReg();
2711 const TargetRegisterClass *RC = MRI.getRegClass(Reg);
2712 Register NewReg = MRI.createVirtualRegister(RC);
2713 MO.setReg(NewReg);
2714 VRMap[Reg] = NewReg;
2715 if (LastDef)
2716 mergeRegUsesAfterPipeline(Reg, NewReg);
2717 }
2718}
2719
2721 OrigKernel = Schedule.getLoop()->getTopBlock();
2722 OrigPreheader = Schedule.getLoop()->getLoopPreheader();
2723 OrigExit = Schedule.getLoop()->getExitBlock();
2724
2725 LLVM_DEBUG(Schedule.dump());
2726
2727 generatePipelinedLoop();
2728}
2729
2730/// Check if ModuloScheduleExpanderMVE can be applied to L
2732 if (!L.getExitBlock()) {
2733 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: No single exit block.\n");
2734 return false;
2735 }
2736
2737 MachineBasicBlock *BB = L.getTopBlock();
2739
2740 // Put some constraints on the operands of the phis to simplify the
2741 // transformation
2742 DenseSet<Register> UsedByPhi;
2743 for (MachineInstr &MI : BB->phis()) {
2744 // Registers defined by phis must be used only inside the loop and be never
2745 // used by phis.
2746 for (MachineOperand &MO : MI.defs())
2747 if (MO.isReg())
2748 for (MachineInstr &Ref : MRI.use_instructions(MO.getReg()))
2749 if (Ref.getParent() != BB || Ref.isPHI()) {
2750 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A phi result is "
2751 "referenced outside of the loop or by phi.\n");
2752 return false;
2753 }
2754
2755 // A source register from the loop block must be defined inside the loop.
2756 // A register defined inside the loop must be referenced by only one phi at
2757 // most.
2758 Register InitVal, LoopVal;
2759 getPhiRegs(MI, MI.getParent(), InitVal, LoopVal);
2760 if (!Register(LoopVal).isVirtual() ||
2761 MRI.getVRegDef(LoopVal)->getParent() != BB) {
2762 LLVM_DEBUG(
2763 dbgs() << "Can not apply MVE expander: A phi source value coming "
2764 "from the loop is not defined in the loop.\n");
2765 return false;
2766 }
2767 if (UsedByPhi.count(LoopVal)) {
2768 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
2769 "loop is referenced by two or more phis.\n");
2770 return false;
2771 }
2772 UsedByPhi.insert(LoopVal);
2773 }
2774
2775 return true;
2776}
2777
2778//===----------------------------------------------------------------------===//
2779// ModuloScheduleTestPass implementation
2780//===----------------------------------------------------------------------===//
2781// This pass constructs a ModuloSchedule from its module and runs
2782// ModuloScheduleExpander.
2783//
2784// The module is expected to contain a single-block analyzable loop.
2785// The total order of instructions is taken from the loop as-is.
2786// Instructions are expected to be annotated with a PostInstrSymbol.
2787// This PostInstrSymbol must have the following format:
2788// "Stage=%d Cycle=%d".
2789//===----------------------------------------------------------------------===//
2790
2791namespace {
2792class ModuloScheduleTest : public MachineFunctionPass {
2793public:
2794 static char ID;
2795
2796 ModuloScheduleTest() : MachineFunctionPass(ID) {
2798 }
2799
2800 bool runOnMachineFunction(MachineFunction &MF) override;
2801 void runOnLoop(MachineFunction &MF, MachineLoop &L);
2802
2803 void getAnalysisUsage(AnalysisUsage &AU) const override {
2807 }
2808};
2809} // namespace
2810
2811char ModuloScheduleTest::ID = 0;
2812
2813INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
2814 "Modulo Schedule test pass", false, false)
2817INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
2818 "Modulo Schedule test pass", false, false)
2819
2820bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2821 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
2822 for (auto *L : MLI) {
2823 if (L->getTopBlock() != L->getBottomBlock())
2824 continue;
2825 runOnLoop(MF, *L);
2826 return false;
2827 }
2828 return false;
2829}
2830
2831static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
2832 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2833 std::pair<StringRef, StringRef> StageTokenAndValue =
2834 getToken(StageAndCycle.first, "-");
2835 std::pair<StringRef, StringRef> CycleTokenAndValue =
2836 getToken(StageAndCycle.second, "-");
2837 if (StageTokenAndValue.first != "Stage" ||
2838 CycleTokenAndValue.first != "_Cycle") {
2840 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2841 return;
2842 }
2843
2844 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2845 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2846
2847 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2848}
2849
2850void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2851 LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
2852 MachineBasicBlock *BB = L.getTopBlock();
2853 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2854
2856 std::vector<MachineInstr *> Instrs;
2857 for (MachineInstr &MI : *BB) {
2858 if (MI.isTerminator())
2859 continue;
2860 Instrs.push_back(&MI);
2861 if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2862 dbgs() << "Parsing post-instr symbol for " << MI;
2863 parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
2864 }
2865 }
2866
2867 ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2868 std::move(Stage));
2870 MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2871 MSE.expand();
2872 MSE.cleanup();
2873}
2874
2875//===----------------------------------------------------------------------===//
2876// ModuloScheduleTestAnnotater implementation
2877//===----------------------------------------------------------------------===//
2878
2880 for (MachineInstr *MI : S.getInstructions()) {
2883 OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2885 MI->setPostInstrSymbol(MF, Sym);
2886 }
2887}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock & MBB
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:86
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1328
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Symbol * Sym
Definition: ELF_riscv.cpp:479
global merge Global merge function pass
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)
Return the register values for the operands of a Phi instruction.
static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Register Reg
Register const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static Register getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
modulo schedule test
static bool hasUseAfterLoop(Register Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)
static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)
Return a phi if Reg is referenced by the phi.
static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit, LiveIntervals &LIS)
Create a dedicated exit for Loop.
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))
static Register getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
MachineInstr unsigned OpIdx
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
raw_pwrite_stream & OS
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:74
iterator end()
Definition: DenseMap.h:87
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
A possibly irreducible generalization of a Loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool hasInterval(Register Reg) const
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void insertMBBInMaps(MachineBasicBlock *MBB)
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:203
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:64
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
instr_iterator instr_begin()
LLVM_ABI void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
LLVM_ABI void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void dump() const
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
LLVM_ABI instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:72
mop_range defs()
Returns all explicit operands that are register definitions.
Definition: MachineInstr.h:724
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
Definition: MachineInstr.h:754
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:590
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:810
LLVM_ABI void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
LLVM_ABI void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr's memory reference descriptor list.
mop_range operands()
Definition: MachineInstr.h:693
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:780
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:511
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
LLVM_ABI MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static use_instr_iterator use_instr_end()
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
const TargetRegisterInfo * getTargetRegisterInfo() const
use_iterator use_begin(Register RegNo) const
static use_iterator use_end()
iterator_range< use_iterator > use_operands(Register Reg) const
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
void print(raw_ostream &OS)
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
void setStage(MachineInstr *MI, int MIStage)
Set the stage of a newly created instruction.
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
std::deque< MachineBasicBlock * > PeeledBack
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
SmallVector< MachineBasicBlock *, 4 > Epilogs
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
MachineBasicBlock * Preheader
The original loop preheader.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
void filterInstructions(MachineBasicBlock *MB, int MinStage)
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:78
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
iterator erase(const_iterator CI)
Definition: SmallVector.h:738
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const
Reverses the branch condition of the specified condition list, returning false on success and true if...
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:174
self_iterator getIterator()
Definition: ilist_node.h:134
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:662
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:692
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
constexpr double e
Definition: MathExtras.h:47
constexpr double phi
Definition: MathExtras.h:61
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition: SFrame.h:77
LLVM_ABI const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1770
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1702
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI void initializeModuloScheduleTestPass(PassRegistry &)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Ref
The access may reference the value stored in memory.
@ Sub
Subtraction of integers.
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1973
@ LPD_Back
Peel the last iteration of the loop.
@ LPD_Front
Peel the first iteration of the loop.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...