LLVM 21.0.0git
ScheduleDAGInstrs.cpp
Go to the documentation of this file.
1//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This implements the ScheduleDAGInstrs class, which implements
10/// re-scheduling of MachineInstrs.
11//
12//===----------------------------------------------------------------------===//
13
15
17#include "llvm/ADT/MapVector.h"
19#include "llvm/ADT/SparseSet.h"
41#include "llvm/Config/llvm-config.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/Function.h"
44#include "llvm/IR/Type.h"
45#include "llvm/IR/Value.h"
46#include "llvm/MC/LaneBitmask.h"
51#include "llvm/Support/Debug.h"
53#include "llvm/Support/Format.h"
55#include <algorithm>
56#include <cassert>
57#include <iterator>
58#include <utility>
59#include <vector>
60
61using namespace llvm;
62
63#define DEBUG_TYPE "machine-scheduler"
64
65static cl::opt<bool>
66 EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
67 cl::desc("Enable use of AA during MI DAG construction"));
68
69static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
70 cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
71
72// Note: the two options below might be used in tuning compile time vs
73// output quality. Setting HugeRegion so large that it will never be
74// reached means best-effort, but may be slow.
75
76// When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
77// together hold this many SUs, a reduction of maps will be done.
78static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
79 cl::init(1000), cl::desc("The limit to use while constructing the DAG "
80 "prior to scheduling, at which point a trade-off "
81 "is made to avoid excessive compile time."));
82
84 "dag-maps-reduction-size", cl::Hidden,
85 cl::desc("A huge scheduling region will have maps reduced by this many "
86 "nodes at a time. Defaults to HugeRegion / 2."));
87
88#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
90 "sched-print-cycles", cl::Hidden, cl::init(false),
91 cl::desc("Report top/bottom cycles when dumping SUnit instances"));
92#endif
93
94static unsigned getReductionSize() {
95 // Always reduce a huge region with half of the elements, except
96 // when user sets this number explicitly.
97 if (ReductionSize.getNumOccurrences() == 0)
98 return HugeRegion / 2;
99 return ReductionSize;
100}
101
103#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
104 dbgs() << "{ ";
105 for (const SUnit *SU : L) {
106 dbgs() << "SU(" << SU->NodeNum << ")";
107 if (SU != L.back())
108 dbgs() << ", ";
109 }
110 dbgs() << "}\n";
111#endif
112}
113
115 const MachineLoopInfo *mli,
116 bool RemoveKillFlags)
117 : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
118 RemoveKillFlags(RemoveKillFlags),
119 UnknownValue(UndefValue::get(
120 Type::getVoidTy(mf.getFunction().getContext()))), Topo(SUnits, &ExitSU) {
121 DbgValues.clear();
122
123 const TargetSubtargetInfo &ST = mf.getSubtarget();
124 SchedModel.init(&ST);
125}
126
127/// If this machine instr has memory reference information and it can be
128/// tracked to a normal reference to a known object, return the Value
129/// for that object. This function returns false the memory location is
130/// unknown or may alias anything.
132 const MachineFrameInfo &MFI,
134 const DataLayout &DL) {
135 auto AllMMOsOkay = [&]() {
136 for (const MachineMemOperand *MMO : MI->memoperands()) {
137 // TODO: Figure out whether isAtomic is really necessary (see D57601).
138 if (MMO->isVolatile() || MMO->isAtomic())
139 return false;
140
141 if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
142 // Function that contain tail calls don't have unique PseudoSourceValue
143 // objects. Two PseudoSourceValues might refer to the same or
144 // overlapping locations. The client code calling this function assumes
145 // this is not the case. So return a conservative answer of no known
146 // object.
147 if (MFI.hasTailCall())
148 return false;
149
150 // For now, ignore PseudoSourceValues which may alias LLVM IR values
151 // because the code that uses this function has no way to cope with
152 // such aliases.
153 if (PSV->isAliased(&MFI))
154 return false;
155
156 bool MayAlias = PSV->mayAlias(&MFI);
157 Objects.emplace_back(PSV, MayAlias);
158 } else if (const Value *V = MMO->getValue()) {
160 if (!getUnderlyingObjectsForCodeGen(V, Objs))
161 return false;
162
163 for (Value *V : Objs) {
165 Objects.emplace_back(V, true);
166 }
167 } else
168 return false;
169 }
170 return true;
171 };
172
173 if (!AllMMOsOkay()) {
174 Objects.clear();
175 return false;
176 }
177
178 return true;
179}
180
182 BB = bb;
183}
184
186 // Subclasses should no longer refer to the old block.
187 BB = nullptr;
188}
189
193 unsigned regioninstrs) {
194 assert(bb == BB && "startBlock should set BB");
196 RegionEnd = end;
197 NumRegionInstrs = regioninstrs;
198}
199
201 // Nothing to do.
202}
203
205 MachineInstr *ExitMI =
206 RegionEnd != BB->end()
208 : nullptr;
209 ExitSU.setInstr(ExitMI);
210 // Add dependencies on the defs and uses of the instruction.
211 if (ExitMI) {
212 const MCInstrDesc &MIDesc = ExitMI->getDesc();
213 for (const MachineOperand &MO : ExitMI->all_uses()) {
214 unsigned OpIdx = MO.getOperandNo();
215 Register Reg = MO.getReg();
216 if (Reg.isPhysical()) {
217 // addPhysRegDataDeps uses the provided operand index to retrieve
218 // the operand use cycle from the scheduling model. If the operand
219 // is "fake" (e.g., an operand of a call instruction used to pass
220 // an argument to the called function.), the scheduling model may not
221 // have an entry for it. If this is the case, pass -1 as operand index,
222 // which will cause addPhysRegDataDeps to add an artificial dependency.
223 // FIXME: Using hasImplicitUseOfPhysReg here is inaccurate as it misses
224 // aliases. When fixing, make sure to update addPhysRegDataDeps, too.
225 bool IsRealUse = OpIdx < MIDesc.getNumOperands() ||
226 MIDesc.hasImplicitUseOfPhysReg(Reg);
227 for (MCRegUnit Unit : TRI->regunits(Reg))
228 Uses.insert(PhysRegSUOper(&ExitSU, IsRealUse ? OpIdx : -1, Unit));
229 } else if (Reg.isVirtual() && MO.readsReg()) {
230 addVRegUseDeps(&ExitSU, OpIdx);
231 }
232 }
233 }
234 if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
235 // For others, e.g. fallthrough, conditional branch, assume the exit
236 // uses all the registers that are livein to the successor blocks.
237 for (const MachineBasicBlock *Succ : BB->successors()) {
238 for (const auto &LI : Succ->liveins()) {
239 for (MCRegUnitMaskIterator U(LI.PhysReg, TRI); U.isValid(); ++U) {
240 auto [Unit, Mask] = *U;
241 if ((Mask & LI.LaneMask).any() && !Uses.contains(Unit))
242 Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit));
243 }
244 }
245 }
246 }
247}
248
249/// MO is an operand of SU's instruction that defines a physical register. Adds
250/// data dependencies from SU to any uses of the physical register.
251void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
252 const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
253 assert(MO.isDef() && "expect physreg def");
254 Register Reg = MO.getReg();
255
256 // Ask the target if address-backscheduling is desirable, and if so how much.
257 const TargetSubtargetInfo &ST = MF.getSubtarget();
258
259 // Only use any non-zero latency for real defs/uses, in contrast to
260 // "fake" operands added by regalloc.
261 const MCInstrDesc &DefMIDesc = SU->getInstr()->getDesc();
262 bool ImplicitPseudoDef = (OperIdx >= DefMIDesc.getNumOperands() &&
263 !DefMIDesc.hasImplicitDefOfPhysReg(Reg));
264 for (MCRegUnit Unit : TRI->regunits(Reg)) {
265 for (RegUnit2SUnitsMap::iterator I = Uses.find(Unit); I != Uses.end();
266 ++I) {
267 SUnit *UseSU = I->SU;
268 if (UseSU == SU)
269 continue;
270
271 // Adjust the dependence latency using operand def/use information,
272 // then allow the target to perform its own adjustments.
273 MachineInstr *UseInstr = nullptr;
274 int UseOpIdx = I->OpIdx;
275 bool ImplicitPseudoUse = false;
276 SDep Dep;
277 if (UseOpIdx < 0) {
278 Dep = SDep(SU, SDep::Artificial);
279 } else {
280 // Set the hasPhysRegDefs only for physreg defs that have a use within
281 // the scheduling region.
282 SU->hasPhysRegDefs = true;
283
284 UseInstr = UseSU->getInstr();
285 Register UseReg = UseInstr->getOperand(UseOpIdx).getReg();
286 const MCInstrDesc &UseMIDesc = UseInstr->getDesc();
287 ImplicitPseudoUse = UseOpIdx >= ((int)UseMIDesc.getNumOperands()) &&
289
290 Dep = SDep(SU, SDep::Data, UseReg);
291 }
292 if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
294 UseInstr, UseOpIdx));
295 } else {
296 Dep.setLatency(0);
297 }
298 ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOpIdx, Dep, &SchedModel);
299 UseSU->addPred(Dep);
300 }
301 }
302}
303
304/// Adds register dependencies (data, anti, and output) from this SUnit
305/// to following instructions in the same scheduling region that depend the
306/// physical register referenced at OperIdx.
307void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
308 MachineInstr *MI = SU->getInstr();
309 MachineOperand &MO = MI->getOperand(OperIdx);
310 Register Reg = MO.getReg();
311 // We do not need to track any dependencies for constant registers.
312 if (MRI.isConstantPhysReg(Reg))
313 return;
314
315 const TargetSubtargetInfo &ST = MF.getSubtarget();
316
317 // Optionally add output and anti dependencies. For anti
318 // dependencies we use a latency of 0 because for a multi-issue
319 // target we want to allow the defining instruction to issue
320 // in the same cycle as the using instruction.
321 // TODO: Using a latency of 1 here for output dependencies assumes
322 // there's no cost for reusing registers.
323 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
324 for (MCRegUnit Unit : TRI->regunits(Reg)) {
325 for (RegUnit2SUnitsMap::iterator I = Defs.find(Unit); I != Defs.end();
326 ++I) {
327 SUnit *DefSU = I->SU;
328 if (DefSU == &ExitSU)
329 continue;
330 MachineInstr *DefInstr = DefSU->getInstr();
331 MachineOperand &DefMO = DefInstr->getOperand(I->OpIdx);
332 if (DefSU != SU &&
333 (Kind != SDep::Output || !MO.isDead() || !DefMO.isDead())) {
334 SDep Dep(SU, Kind, DefMO.getReg());
335 if (Kind != SDep::Anti) {
336 Dep.setLatency(
337 SchedModel.computeOutputLatency(MI, OperIdx, DefInstr));
338 }
339 ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep,
340 &SchedModel);
341 DefSU->addPred(Dep);
342 }
343 }
344 }
345
346 if (MO.isUse()) {
347 SU->hasPhysRegUses = true;
348 // Either insert a new Reg2SUnits entry with an empty SUnits list, or
349 // retrieve the existing SUnits list for this register's uses.
350 // Push this SUnit on the use list.
351 for (MCRegUnit Unit : TRI->regunits(Reg))
352 Uses.insert(PhysRegSUOper(SU, OperIdx, Unit));
353 if (RemoveKillFlags)
354 MO.setIsKill(false);
355 } else {
356 addPhysRegDataDeps(SU, OperIdx);
357
358 // Clear previous uses and defs of this register and its subregisters.
359 for (MCRegUnit Unit : TRI->regunits(Reg)) {
360 Uses.eraseAll(Unit);
361 if (!MO.isDead())
362 Defs.eraseAll(Unit);
363 }
364
365 if (MO.isDead() && SU->isCall) {
366 // Calls will not be reordered because of chain dependencies (see
367 // below). Since call operands are dead, calls may continue to be added
368 // to the DefList making dependence checking quadratic in the size of
369 // the block. Instead, we leave only one call at the back of the
370 // DefList.
371 for (MCRegUnit Unit : TRI->regunits(Reg)) {
375 for (bool isBegin = I == B; !isBegin; /* empty */) {
376 isBegin = (--I) == B;
377 if (!I->SU->isCall)
378 break;
379 I = Defs.erase(I);
380 }
381 }
382 }
383
384 // Defs are pushed in the order they are visited and never reordered.
385 for (MCRegUnit Unit : TRI->regunits(Reg))
386 Defs.insert(PhysRegSUOper(SU, OperIdx, Unit));
387 }
388}
389
391{
392 Register Reg = MO.getReg();
393 // No point in tracking lanemasks if we don't have interesting subregisters.
394 const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
395 if (!RC.HasDisjunctSubRegs)
396 return LaneBitmask::getAll();
397
398 unsigned SubReg = MO.getSubReg();
399 if (SubReg == 0)
400 return RC.getLaneMask();
402}
403
405 auto RegUse = CurrentVRegUses.find(MO.getReg());
406 if (RegUse == CurrentVRegUses.end())
407 return true;
408 return (RegUse->LaneMask & getLaneMaskForMO(MO)).none();
409}
410
411/// Adds register output and data dependencies from this SUnit to instructions
412/// that occur later in the same scheduling region if they read from or write to
413/// the virtual register defined at OperIdx.
414///
415/// TODO: Hoist loop induction variable increments. This has to be
416/// reevaluated. Generally, IV scheduling should be done before coalescing.
417void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
418 MachineInstr *MI = SU->getInstr();
419 MachineOperand &MO = MI->getOperand(OperIdx);
420 Register Reg = MO.getReg();
421
422 LaneBitmask DefLaneMask;
423 LaneBitmask KillLaneMask;
424 if (TrackLaneMasks) {
425 bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
426 DefLaneMask = getLaneMaskForMO(MO);
427 // If we have a <read-undef> flag, none of the lane values comes from an
428 // earlier instruction.
429 KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
430
431 if (MO.getSubReg() != 0 && MO.isUndef()) {
432 // There may be other subregister defs on the same instruction of the same
433 // register in later operands. The lanes of other defs will now be live
434 // after this instruction, so these should not be treated as killed by the
435 // instruction even though they appear to be killed in this one operand.
436 for (const MachineOperand &OtherMO :
437 llvm::drop_begin(MI->operands(), OperIdx + 1))
438 if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
439 KillLaneMask &= ~getLaneMaskForMO(OtherMO);
440 }
441
442 // Clear undef flag, we'll re-add it later once we know which subregister
443 // Def is first.
444 MO.setIsUndef(false);
445 } else {
446 DefLaneMask = LaneBitmask::getAll();
447 KillLaneMask = LaneBitmask::getAll();
448 }
449
450 if (MO.isDead()) {
451 assert(deadDefHasNoUse(MO) && "Dead defs should have no uses");
452 } else {
453 // Add data dependence to all uses we found so far.
454 const TargetSubtargetInfo &ST = MF.getSubtarget();
456 E = CurrentVRegUses.end(); I != E; /*empty*/) {
457 LaneBitmask LaneMask = I->LaneMask;
458 // Ignore uses of other lanes.
459 if ((LaneMask & KillLaneMask).none()) {
460 ++I;
461 continue;
462 }
463
464 if ((LaneMask & DefLaneMask).any()) {
465 SUnit *UseSU = I->SU;
466 MachineInstr *Use = UseSU->getInstr();
467 SDep Dep(SU, SDep::Data, Reg);
469 I->OperandIndex));
470 ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep,
471 &SchedModel);
472 UseSU->addPred(Dep);
473 }
474
475 LaneMask &= ~KillLaneMask;
476 // If we found a Def for all lanes of this use, remove it from the list.
477 if (LaneMask.any()) {
478 I->LaneMask = LaneMask;
479 ++I;
480 } else
482 }
483 }
484
485 // Shortcut: Singly defined vregs do not have output/anti dependencies.
486 if (MRI.hasOneDef(Reg))
487 return;
488
489 // Add output dependence to the next nearest defs of this vreg.
490 //
491 // Unless this definition is dead, the output dependence should be
492 // transitively redundant with antidependencies from this definition's
493 // uses. We're conservative for now until we have a way to guarantee the uses
494 // are not eliminated sometime during scheduling. The output dependence edge
495 // is also useful if output latency exceeds def-use latency.
496 LaneBitmask LaneMask = DefLaneMask;
497 for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
498 CurrentVRegDefs.end())) {
499 // Ignore defs for other lanes.
500 if ((V2SU.LaneMask & LaneMask).none())
501 continue;
502 // Add an output dependence.
503 SUnit *DefSU = V2SU.SU;
504 // Ignore additional defs of the same lanes in one instruction. This can
505 // happen because lanemasks are shared for targets with too many
506 // subregisters. We also use some representration tricks/hacks where we
507 // add super-register defs/uses, to imply that although we only access parts
508 // of the reg we care about the full one.
509 if (DefSU == SU)
510 continue;
511 SDep Dep(SU, SDep::Output, Reg);
512 Dep.setLatency(
513 SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
514 DefSU->addPred(Dep);
515
516 // Update current definition. This can get tricky if the def was about a
517 // bigger lanemask before. We then have to shrink it and create a new
518 // VReg2SUnit for the non-overlapping part.
519 LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
520 LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
521 V2SU.SU = SU;
522 V2SU.LaneMask = OverlapMask;
523 if (NonOverlapMask.any())
524 CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
525 }
526 // If there was no CurrentVRegDefs entry for some lanes yet, create one.
527 if (LaneMask.any())
528 CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
529}
530
531/// Adds a register data dependency if the instruction that defines the
532/// virtual register used at OperIdx is mapped to an SUnit. Add a register
533/// antidependency from this SUnit to instructions that occur later in the same
534/// scheduling region if they write the virtual register.
535///
536/// TODO: Handle ExitSU "uses" properly.
537void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
538 const MachineInstr *MI = SU->getInstr();
539 assert(!MI->isDebugOrPseudoInstr());
540
541 const MachineOperand &MO = MI->getOperand(OperIdx);
542 Register Reg = MO.getReg();
543
544 // Remember the use. Data dependencies will be added when we find the def.
547 CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
548
549 // Add antidependences to the following defs of the vreg.
550 for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
551 CurrentVRegDefs.end())) {
552 // Ignore defs for unrelated lanes.
553 LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
554 if ((PrevDefLaneMask & LaneMask).none())
555 continue;
556 if (V2SU.SU == SU)
557 continue;
558
559 V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
560 }
561}
562
563
565 unsigned Latency) {
566 if (SUa->getInstr()->mayAlias(getAAForDep(), *SUb->getInstr(), UseTBAA)) {
567 SDep Dep(SUa, SDep::MayAliasMem);
568 Dep.setLatency(Latency);
569 SUb->addPred(Dep);
570 }
571}
572
573/// Creates an SUnit for each real instruction, numbered in top-down
574/// topological order. The instruction order A < B, implies that no edge exists
575/// from B to A.
576///
577/// Map each real instruction to its SUnit.
578///
579/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
580/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
581/// instead of pointers.
582///
583/// MachineScheduler relies on initSUnits numbering the nodes by their order in
584/// the original instruction list.
586 // We'll be allocating one SUnit for each real instruction in the region,
587 // which is contained within a basic block.
588 SUnits.reserve(NumRegionInstrs);
589
591 if (MI.isDebugOrPseudoInstr())
592 continue;
593
594 SUnit *SU = newSUnit(&MI);
595 MISUnitMap[&MI] = SU;
596
597 SU->isCall = MI.isCall();
598 SU->isCommutable = MI.isCommutable();
599
600 // Assign the Latency field of SU using target-provided information.
601 SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
602
603 // If this SUnit uses a reserved or unbuffered resource, mark it as such.
604 //
605 // Reserved resources block an instruction from issuing and stall the
606 // entire pipeline. These are identified by BufferSize=0.
607 //
608 // Unbuffered resources prevent execution of subsequent instructions that
609 // require the same resources. This is used for in-order execution pipelines
610 // within an out-of-order core. These are identified by BufferSize=1.
612 const MCSchedClassDesc *SC = getSchedClass(SU);
613 for (const MCWriteProcResEntry &PRE :
616 switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
617 case 0:
618 SU->hasReservedResource = true;
619 break;
620 case 1:
621 SU->isUnbuffered = true;
622 break;
623 default:
624 break;
625 }
626 }
627 }
628 }
629}
630
632 : public SmallMapVector<ValueType, SUList, 4> {
633 /// Current total number of SUs in map.
634 unsigned NumNodes = 0;
635
636 /// 1 for loads, 0 for stores. (see comment in SUList)
637 unsigned TrueMemOrderLatency;
638
639public:
640 Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
641
642 /// To keep NumNodes up to date, insert() is used instead of
643 /// this operator w/ push_back().
645 llvm_unreachable("Don't use. Use insert() instead."); };
646
647 /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
648 /// reduce().
649 void inline insert(SUnit *SU, ValueType V) {
650 MapVector::operator[](V).push_back(SU);
651 NumNodes++;
652 }
653
654 /// Clears the list of SUs mapped to V.
655 void inline clearList(ValueType V) {
656 iterator Itr = find(V);
657 if (Itr != end()) {
658 assert(NumNodes >= Itr->second.size());
659 NumNodes -= Itr->second.size();
660
661 Itr->second.clear();
662 }
663 }
664
665 /// Clears map from all contents.
666 void clear() {
668 NumNodes = 0;
669 }
670
671 unsigned inline size() const { return NumNodes; }
672
673 /// Counts the number of SUs in this map after a reduction.
675 NumNodes = 0;
676 for (auto &I : *this)
677 NumNodes += I.second.size();
678 }
679
680 unsigned inline getTrueMemOrderLatency() const {
681 return TrueMemOrderLatency;
682 }
683
684 void dump();
685};
686
688 Value2SUsMap &Val2SUsMap) {
689 for (auto &I : Val2SUsMap)
690 addChainDependencies(SU, I.second,
691 Val2SUsMap.getTrueMemOrderLatency());
692}
693
695 Value2SUsMap &Val2SUsMap,
696 ValueType V) {
697 Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
698 if (Itr != Val2SUsMap.end())
699 addChainDependencies(SU, Itr->second,
700 Val2SUsMap.getTrueMemOrderLatency());
701}
702
704 assert(BarrierChain != nullptr);
705
706 for (auto &[V, SUs] : map) {
707 (void)V;
708 for (auto *SU : SUs)
709 SU->addPredBarrier(BarrierChain);
710 }
711 map.clear();
712}
713
715 assert(BarrierChain != nullptr);
716
717 // Go through all lists of SUs.
718 for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
719 Value2SUsMap::iterator CurrItr = I++;
720 SUList &sus = CurrItr->second;
721 SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
722 for (; SUItr != SUEE; ++SUItr) {
723 // Stop on BarrierChain or any instruction above it.
724 if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
725 break;
726
727 (*SUItr)->addPredBarrier(BarrierChain);
728 }
729
730 // Remove also the BarrierChain from list if present.
731 if (SUItr != SUEE && *SUItr == BarrierChain)
732 SUItr++;
733
734 // Remove all SUs that are now successors of BarrierChain.
735 if (SUItr != sus.begin())
736 sus.erase(sus.begin(), SUItr);
737 }
738
739 // Remove all entries with empty su lists.
740 map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
741 return (mapEntry.second.empty()); });
742
743 // Recompute the size of the map (NumNodes).
744 map.reComputeSize();
745}
746
748 RegPressureTracker *RPTracker,
749 PressureDiffs *PDiffs,
750 LiveIntervals *LIS,
751 bool TrackLaneMasks) {
752 const TargetSubtargetInfo &ST = MF.getSubtarget();
754 : ST.useAA();
755 if (UseAA && AA)
756 AAForDep.emplace(*AA);
757
758 BarrierChain = nullptr;
759
760 this->TrackLaneMasks = TrackLaneMasks;
761 MISUnitMap.clear();
763
764 // Create an SUnit for each real instruction.
765 initSUnits();
766
767 if (PDiffs)
768 PDiffs->init(SUnits.size());
769
770 // We build scheduling units by walking a block's instruction list
771 // from bottom to top.
772
773 // Each MIs' memory operand(s) is analyzed to a list of underlying
774 // objects. The SU is then inserted in the SUList(s) mapped from the
775 // Value(s). Each Value thus gets mapped to lists of SUs depending
776 // on it, stores and loads kept separately. Two SUs are trivially
777 // non-aliasing if they both depend on only identified Values and do
778 // not share any common Value.
779 Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
780
781 // Certain memory accesses are known to not alias any SU in Stores
782 // or Loads, and have therefore their own 'NonAlias'
783 // domain. E.g. spill / reload instructions never alias LLVM I/R
784 // Values. It would be nice to assume that this type of memory
785 // accesses always have a proper memory operand modelling, and are
786 // therefore never unanalyzable, but this is conservatively not
787 // done.
788 Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
789
790 // Track all instructions that may raise floating-point exceptions.
791 // These do not depend on one other (or normal loads or stores), but
792 // must not be rescheduled across global barriers. Note that we don't
793 // really need a "map" here since we don't track those MIs by value;
794 // using the same Value2SUsMap data type here is simply a matter of
795 // convenience.
796 Value2SUsMap FPExceptions;
797
798 // Remove any stale debug info; sometimes BuildSchedGraph is called again
799 // without emitting the info from the previous call.
800 DbgValues.clear();
801 FirstDbgValue = nullptr;
802
803 assert(Defs.empty() && Uses.empty() &&
804 "Only BuildGraph should update Defs/Uses");
807
808 assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
809 assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
810 unsigned NumVirtRegs = MRI.getNumVirtRegs();
811 CurrentVRegDefs.setUniverse(NumVirtRegs);
812 CurrentVRegUses.setUniverse(NumVirtRegs);
813
814 // Model data dependencies between instructions being scheduled and the
815 // ExitSU.
817
818 // Walk the list of instructions, from bottom moving up.
819 MachineInstr *DbgMI = nullptr;
821 MII != MIE; --MII) {
822 MachineInstr &MI = *std::prev(MII);
823 if (DbgMI) {
824 DbgValues.emplace_back(DbgMI, &MI);
825 DbgMI = nullptr;
826 }
827
828 if (MI.isDebugValue() || MI.isDebugPHI()) {
829 DbgMI = &MI;
830 continue;
831 }
832
833 if (MI.isDebugLabel() || MI.isDebugRef() || MI.isPseudoProbe())
834 continue;
835
836 SUnit *SU = MISUnitMap[&MI];
837 assert(SU && "No SUnit mapped to this MI");
838
839 if (RPTracker) {
840 RegisterOperands RegOpers;
841 RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
842 if (TrackLaneMasks) {
843 SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
844 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
845 }
846 if (PDiffs != nullptr)
847 PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
848
849 if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI)
850 RPTracker->recedeSkipDebugValues();
851 assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
852 RPTracker->recede(RegOpers);
853 }
854
855 assert(
856 (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
857 "Cannot schedule terminators or labels!");
858
859 // Add register-based dependencies (data, anti, and output).
860 // For some instructions (calls, returns, inline-asm, etc.) there can
861 // be explicit uses and implicit defs, in which case the use will appear
862 // on the operand list before the def. Do two passes over the operand
863 // list to make sure that defs are processed before any uses.
864 bool HasVRegDef = false;
865 for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
866 const MachineOperand &MO = MI.getOperand(j);
867 if (!MO.isReg() || !MO.isDef())
868 continue;
869 Register Reg = MO.getReg();
870 if (Reg.isPhysical()) {
871 addPhysRegDeps(SU, j);
872 } else if (Reg.isVirtual()) {
873 HasVRegDef = true;
874 addVRegDefDeps(SU, j);
875 }
876 }
877 // Now process all uses.
878 for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
879 const MachineOperand &MO = MI.getOperand(j);
880 // Only look at use operands.
881 // We do not need to check for MO.readsReg() here because subsequent
882 // subregister defs will get output dependence edges and need no
883 // additional use dependencies.
884 if (!MO.isReg() || !MO.isUse())
885 continue;
886 Register Reg = MO.getReg();
887 if (Reg.isPhysical()) {
888 addPhysRegDeps(SU, j);
889 } else if (Reg.isVirtual() && MO.readsReg()) {
890 addVRegUseDeps(SU, j);
891 }
892 }
893
894 // If we haven't seen any uses in this scheduling region, create a
895 // dependence edge to ExitSU to model the live-out latency. This is required
896 // for vreg defs with no in-region use, and prefetches with no vreg def.
897 //
898 // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
899 // check currently relies on being called before adding chain deps.
900 if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
901 SDep Dep(SU, SDep::Artificial);
902 Dep.setLatency(SU->Latency - 1);
903 ExitSU.addPred(Dep);
904 }
905
906 // Add memory dependencies (Note: isStoreToStackSlot and
907 // isLoadFromStackSLot are not usable after stack slots are lowered to
908 // actual addresses).
909
910 const TargetInstrInfo *TII = ST.getInstrInfo();
911 // This is a barrier event that acts as a pivotal node in the DAG.
912 if (TII->isGlobalMemoryObject(&MI)) {
913
914 // Become the barrier chain.
915 if (BarrierChain)
917 BarrierChain = SU;
918
919 LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
920 << BarrierChain->NodeNum << ").\n");
921
922 // Add dependencies against everything below it and clear maps.
923 addBarrierChain(Stores);
924 addBarrierChain(Loads);
925 addBarrierChain(NonAliasStores);
926 addBarrierChain(NonAliasLoads);
927 addBarrierChain(FPExceptions);
928
929 continue;
930 }
931
932 // Instructions that may raise FP exceptions may not be moved
933 // across any global barriers.
934 if (MI.mayRaiseFPException()) {
935 if (BarrierChain)
937
938 FPExceptions.insert(SU, UnknownValue);
939
940 if (FPExceptions.size() >= HugeRegion) {
941 LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n");
942 Value2SUsMap empty;
943 reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize());
944 }
945 }
946
947 // If it's not a store or a variant load, we're done.
948 if (!MI.mayStore() &&
949 !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad()))
950 continue;
951
952 // Always add dependecy edge to BarrierChain if present.
953 if (BarrierChain)
955
956 // Find the underlying objects for MI. The Objs vector is either
957 // empty, or filled with the Values of memory locations which this
958 // SU depends on.
960 bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs,
961 MF.getDataLayout());
962
963 if (MI.mayStore()) {
964 if (!ObjsFound) {
965 // An unknown store depends on all stores and loads.
966 addChainDependencies(SU, Stores);
967 addChainDependencies(SU, NonAliasStores);
968 addChainDependencies(SU, Loads);
969 addChainDependencies(SU, NonAliasLoads);
970
971 // Map this store to 'UnknownValue'.
972 Stores.insert(SU, UnknownValue);
973 } else {
974 // Add precise dependencies against all previously seen memory
975 // accesses mapped to the same Value(s).
976 for (const UnderlyingObject &UnderlObj : Objs) {
977 ValueType V = UnderlObj.getValue();
978 bool ThisMayAlias = UnderlObj.mayAlias();
979
980 // Add dependencies to previous stores and loads mapped to V.
981 addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
982 addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
983 }
984 // Update the store map after all chains have been added to avoid adding
985 // self-loop edge if multiple underlying objects are present.
986 for (const UnderlyingObject &UnderlObj : Objs) {
987 ValueType V = UnderlObj.getValue();
988 bool ThisMayAlias = UnderlObj.mayAlias();
989
990 // Map this store to V.
991 (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
992 }
993 // The store may have dependencies to unanalyzable loads and
994 // stores.
997 }
998 } else { // SU is a load.
999 if (!ObjsFound) {
1000 // An unknown load depends on all stores.
1001 addChainDependencies(SU, Stores);
1002 addChainDependencies(SU, NonAliasStores);
1003
1004 Loads.insert(SU, UnknownValue);
1005 } else {
1006 for (const UnderlyingObject &UnderlObj : Objs) {
1007 ValueType V = UnderlObj.getValue();
1008 bool ThisMayAlias = UnderlObj.mayAlias();
1009
1010 // Add precise dependencies against all previously seen stores
1011 // mapping to the same Value(s).
1012 addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
1013
1014 // Map this load to V.
1015 (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
1016 }
1017 // The load may have dependencies to unanalyzable stores.
1019 }
1020 }
1021
1022 // Reduce maps if they grow huge.
1023 if (Stores.size() + Loads.size() >= HugeRegion) {
1024 LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n");
1025 reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
1026 }
1027 if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
1028 LLVM_DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n");
1029 reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
1030 }
1031 }
1032
1033 if (DbgMI)
1034 FirstDbgValue = DbgMI;
1035
1036 Defs.clear();
1037 Uses.clear();
1040
1041 Topo.MarkDirty();
1042}
1043
1045 PSV->printCustom(OS);
1046 return OS;
1047}
1048
1050 for (const auto &[ValType, SUs] : *this) {
1051 if (isa<const Value *>(ValType)) {
1052 const Value *V = cast<const Value *>(ValType);
1053 if (isa<UndefValue>(V))
1054 dbgs() << "Unknown";
1055 else
1056 V->printAsOperand(dbgs());
1057 } else if (isa<const PseudoSourceValue *>(ValType))
1058 dbgs() << cast<const PseudoSourceValue *>(ValType);
1059 else
1060 llvm_unreachable("Unknown Value type.");
1061
1062 dbgs() << " : ";
1063 dumpSUList(SUs);
1064 }
1065}
1066
1068 Value2SUsMap &loads, unsigned N) {
1069 LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump();
1070 dbgs() << "Loading SUnits:\n"; loads.dump());
1071
1072 // Insert all SU's NodeNums into a vector and sort it.
1073 std::vector<unsigned> NodeNums;
1074 NodeNums.reserve(stores.size() + loads.size());
1075 for (const auto &[V, SUs] : stores) {
1076 (void)V;
1077 for (const auto *SU : SUs)
1078 NodeNums.push_back(SU->NodeNum);
1079 }
1080 for (const auto &[V, SUs] : loads) {
1081 (void)V;
1082 for (const auto *SU : SUs)
1083 NodeNums.push_back(SU->NodeNum);
1084 }
1085 llvm::sort(NodeNums);
1086
1087 // The N last elements in NodeNums will be removed, and the SU with
1088 // the lowest NodeNum of them will become the new BarrierChain to
1089 // let the not yet seen SUs have a dependency to the removed SUs.
1090 assert(N <= NodeNums.size());
1091 SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
1092 if (BarrierChain) {
1093 // The aliasing and non-aliasing maps reduce independently of each
1094 // other, but share a common BarrierChain. Check if the
1095 // newBarrierChain is above the former one. If it is not, it may
1096 // introduce a loop to use newBarrierChain, so keep the old one.
1097 if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1098 BarrierChain->addPredBarrier(newBarrierChain);
1099 BarrierChain = newBarrierChain;
1100 LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU("
1101 << BarrierChain->NodeNum << ").\n");
1102 }
1103 else
1104 LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU("
1105 << BarrierChain->NodeNum << ").\n");
1106 }
1107 else
1108 BarrierChain = newBarrierChain;
1109
1112
1113 LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump();
1114 dbgs() << "Loading SUnits:\n"; loads.dump());
1115}
1116
1118 MachineInstr &MI, bool addToLiveRegs) {
1119 for (MachineOperand &MO : MI.operands()) {
1120 if (!MO.isReg() || !MO.readsReg())
1121 continue;
1122 Register Reg = MO.getReg();
1123 if (!Reg)
1124 continue;
1125
1126 // Things that are available after the instruction are killed by it.
1127 bool IsKill = LiveRegs.available(Reg);
1128
1129 // Exception: Do not kill reserved registers
1130 MO.setIsKill(IsKill && !MRI.isReserved(Reg));
1131 if (addToLiveRegs)
1132 LiveRegs.addReg(Reg);
1133 }
1134}
1135
1137 LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
1138
1139 LiveRegs.init(*TRI);
1141
1142 // Examine block from end to start...
1143 for (MachineInstr &MI : llvm::reverse(MBB)) {
1144 if (MI.isDebugOrPseudoInstr())
1145 continue;
1146
1147 // Update liveness. Registers that are defed but not used in this
1148 // instruction are now dead. Mark register and all subregs as they
1149 // are completely defined.
1150 for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
1151 const MachineOperand &MO = *O;
1152 if (MO.isReg()) {
1153 if (!MO.isDef())
1154 continue;
1155 Register Reg = MO.getReg();
1156 if (!Reg)
1157 continue;
1158 LiveRegs.removeReg(Reg);
1159 } else if (MO.isRegMask()) {
1161 }
1162 }
1163
1164 // If there is a bundle header fix it up first.
1165 if (!MI.isBundled()) {
1166 toggleKills(MRI, LiveRegs, MI, true);
1167 } else {
1168 MachineBasicBlock::instr_iterator Bundle = MI.getIterator();
1169 if (MI.isBundle())
1170 toggleKills(MRI, LiveRegs, MI, false);
1171
1172 // Some targets make the (questionable) assumtion that the instructions
1173 // inside the bundle are ordered and consequently only the last use of
1174 // a register inside the bundle can kill it.
1175 MachineBasicBlock::instr_iterator I = std::next(Bundle);
1176 while (I->isBundledWithSucc())
1177 ++I;
1178 do {
1179 if (!I->isDebugOrPseudoInstr())
1180 toggleKills(MRI, LiveRegs, *I, true);
1181 --I;
1182 } while (I != Bundle);
1183 }
1184 }
1185}
1186
1187void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
1188#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1189 dumpNodeName(SU);
1190 if (SchedPrintCycles)
1191 dbgs() << " [TopReadyCycle = " << SU.TopReadyCycle
1192 << ", BottomReadyCycle = " << SU.BotReadyCycle << "]";
1193 dbgs() << ": ";
1194 SU.getInstr()->dump();
1195#endif
1196}
1197
1199#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1200 if (EntrySU.getInstr() != nullptr)
1202 for (const SUnit &SU : SUnits)
1203 dumpNodeAll(SU);
1204 if (ExitSU.getInstr() != nullptr)
1206#endif
1207}
1208
1209std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1210 std::string s;
1211 raw_string_ostream oss(s);
1212 if (SU == &EntrySU)
1213 oss << "<entry>";
1214 else if (SU == &ExitSU)
1215 oss << "<exit>";
1216 else
1217 SU->getInstr()->print(oss, /*IsStandalone=*/true);
1218 return s;
1219}
1220
1221/// Return the basic block label. It is not necessarily unique because a block
1222/// contains multiple scheduling regions. But it is fine for visualization.
1224 return "dag." + BB->getFullName();
1225}
1226
1228 return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
1229}
1230
1231bool ScheduleDAGInstrs::addEdge(SUnit *SuccSU, const SDep &PredDep) {
1232 if (SuccSU != &ExitSU) {
1233 // Do not use WillCreateCycle, it assumes SD scheduling.
1234 // If Pred is reachable from Succ, then the edge creates a cycle.
1235 if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
1236 return false;
1237 Topo.AddPredQueued(SuccSU, PredDep.getSUnit());
1238 }
1239 SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
1240 // Return true regardless of whether a new edge needed to be inserted.
1241 return true;
1242}
1243
1244//===----------------------------------------------------------------------===//
1245// SchedDFSResult Implementation
1246//===----------------------------------------------------------------------===//
1247
1248namespace llvm {
1249
1250/// Internal state used to compute SchedDFSResult.
1252 SchedDFSResult &R;
1253
1254 /// Join DAG nodes into equivalence classes by their subtree.
1255 IntEqClasses SubtreeClasses;
1256 /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1257 std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
1258
1259 struct RootData {
1260 unsigned NodeID;
1261 unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
1262 unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
1263 /// children.
1264
1265 RootData(unsigned id): NodeID(id),
1266 ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
1267
1268 unsigned getSparseSetIndex() const { return NodeID; }
1269 };
1270
1271 SparseSet<RootData> RootSet;
1272
1273public:
1274 SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1275 RootSet.setUniverse(R.DFSNodeData.size());
1276 }
1277
1278 /// Returns true if this node been visited by the DFS traversal.
1279 ///
1280 /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1281 /// ID. Later, SubtreeID is updated but remains valid.
1282 bool isVisited(const SUnit *SU) const {
1283 return R.DFSNodeData[SU->NodeNum].SubtreeID
1284 != SchedDFSResult::InvalidSubtreeID;
1285 }
1286
1287 /// Initializes this node's instruction count. We don't need to flag the node
1288 /// visited until visitPostorder because the DAG cannot have cycles.
1289 void visitPreorder(const SUnit *SU) {
1290 R.DFSNodeData[SU->NodeNum].InstrCount =
1291 SU->getInstr()->isTransient() ? 0 : 1;
1292 }
1293
1294 /// Called once for each node after all predecessors are visited. Revisit this
1295 /// node's predecessors and potentially join them now that we know the ILP of
1296 /// the other predecessors.
1297 void visitPostorderNode(const SUnit *SU) {
1298 // Mark this node as the root of a subtree. It may be joined with its
1299 // successors later.
1300 R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1301 RootData RData(SU->NodeNum);
1302 RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1303
1304 // If any predecessors are still in their own subtree, they either cannot be
1305 // joined or are large enough to remain separate. If this parent node's
1306 // total instruction count is not greater than a child subtree by at least
1307 // the subtree limit, then try to join it now since splitting subtrees is
1308 // only useful if multiple high-pressure paths are possible.
1309 unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1310 for (const SDep &PredDep : SU->Preds) {
1311 if (PredDep.getKind() != SDep::Data)
1312 continue;
1313 unsigned PredNum = PredDep.getSUnit()->NodeNum;
1314 if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1315 joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
1316
1317 // Either link or merge the TreeData entry from the child to the parent.
1318 if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1319 // If the predecessor's parent is invalid, this is a tree edge and the
1320 // current node is the parent.
1321 if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1322 RootSet[PredNum].ParentNodeID = SU->NodeNum;
1323 }
1324 else if (RootSet.count(PredNum)) {
1325 // The predecessor is not a root, but is still in the root set. This
1326 // must be the new parent that it was just joined to. Note that
1327 // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1328 // set to the original parent.
1329 RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1330 RootSet.erase(PredNum);
1331 }
1332 }
1333 RootSet[SU->NodeNum] = RData;
1334 }
1335
1336 /// Called once for each tree edge after calling visitPostOrderNode on
1337 /// the predecessor. Increment the parent node's instruction count and
1338 /// preemptively join this subtree to its parent's if it is small enough.
1339 void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1340 R.DFSNodeData[Succ->NodeNum].InstrCount
1341 += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1342 joinPredSubtree(PredDep, Succ);
1343 }
1344
1345 /// Adds a connection for cross edges.
1346 void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1347 ConnectionPairs.emplace_back(PredDep.getSUnit(), Succ);
1348 }
1349
1350 /// Sets each node's subtree ID to the representative ID and record
1351 /// connections between trees.
1352 void finalize() {
1353 SubtreeClasses.compress();
1354 R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1355 assert(SubtreeClasses.getNumClasses() == RootSet.size()
1356 && "number of roots should match trees");
1357 for (const RootData &Root : RootSet) {
1358 unsigned TreeID = SubtreeClasses[Root.NodeID];
1359 if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1360 R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
1361 R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
1362 // Note that SubInstrCount may be greater than InstrCount if we joined
1363 // subtrees across a cross edge. InstrCount will be attributed to the
1364 // original parent, while SubInstrCount will be attributed to the joined
1365 // parent.
1366 }
1367 R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1368 R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1369 LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1370 for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1371 R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1372 LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1373 << R.DFSNodeData[Idx].SubtreeID << '\n');
1374 }
1375 for (const auto &[Pred, Succ] : ConnectionPairs) {
1376 unsigned PredTree = SubtreeClasses[Pred->NodeNum];
1377 unsigned SuccTree = SubtreeClasses[Succ->NodeNum];
1378 if (PredTree == SuccTree)
1379 continue;
1380 unsigned Depth = Pred->getDepth();
1381 addConnection(PredTree, SuccTree, Depth);
1382 addConnection(SuccTree, PredTree, Depth);
1383 }
1384 }
1385
1386protected:
1387 /// Joins the predecessor subtree with the successor that is its DFS parent.
1388 /// Applies some heuristics before joining.
1389 bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1390 bool CheckLimit = true) {
1391 assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1392
1393 // Check if the predecessor is already joined.
1394 const SUnit *PredSU = PredDep.getSUnit();
1395 unsigned PredNum = PredSU->NodeNum;
1396 if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1397 return false;
1398
1399 // Four is the magic number of successors before a node is considered a
1400 // pinch point.
1401 unsigned NumDataSucs = 0;
1402 for (const SDep &SuccDep : PredSU->Succs) {
1403 if (SuccDep.getKind() == SDep::Data) {
1404 if (++NumDataSucs >= 4)
1405 return false;
1406 }
1407 }
1408 if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1409 return false;
1410 R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1411 SubtreeClasses.join(Succ->NodeNum, PredNum);
1412 return true;
1413 }
1414
1415 /// Called by finalize() to record a connection between trees.
1416 void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1417 if (!Depth)
1418 return;
1419
1420 do {
1422 R.SubtreeConnections[FromTree];
1423 for (SchedDFSResult::Connection &C : Connections) {
1424 if (C.TreeID == ToTree) {
1425 C.Level = std::max(C.Level, Depth);
1426 return;
1427 }
1428 }
1429 Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1430 FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1431 } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1432 }
1433};
1434
1435} // end namespace llvm
1436
1437namespace {
1438
1439/// Manage the stack used by a reverse depth-first search over the DAG.
1440class SchedDAGReverseDFS {
1441 std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
1442
1443public:
1444 bool isComplete() const { return DFSStack.empty(); }
1445
1446 void follow(const SUnit *SU) {
1447 DFSStack.emplace_back(SU, SU->Preds.begin());
1448 }
1449 void advance() { ++DFSStack.back().second; }
1450
1451 const SDep *backtrack() {
1452 DFSStack.pop_back();
1453 return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
1454 }
1455
1456 const SUnit *getCurr() const { return DFSStack.back().first; }
1457
1458 SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1459
1460 SUnit::const_pred_iterator getPredEnd() const {
1461 return getCurr()->Preds.end();
1462 }
1463};
1464
1465} // end anonymous namespace
1466
1467static bool hasDataSucc(const SUnit *SU) {
1468 for (const SDep &SuccDep : SU->Succs) {
1469 if (SuccDep.getKind() == SDep::Data &&
1470 !SuccDep.getSUnit()->isBoundaryNode())
1471 return true;
1472 }
1473 return false;
1474}
1475
1476/// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
1477/// search from this root.
1479 if (!IsBottomUp)
1480 llvm_unreachable("Top-down ILP metric is unimplemented");
1481
1482 SchedDFSImpl Impl(*this);
1483 for (const SUnit &SU : SUnits) {
1484 if (Impl.isVisited(&SU) || hasDataSucc(&SU))
1485 continue;
1486
1487 SchedDAGReverseDFS DFS;
1488 Impl.visitPreorder(&SU);
1489 DFS.follow(&SU);
1490 while (true) {
1491 // Traverse the leftmost path as far as possible.
1492 while (DFS.getPred() != DFS.getPredEnd()) {
1493 const SDep &PredDep = *DFS.getPred();
1494 DFS.advance();
1495 // Ignore non-data edges.
1496 if (PredDep.getKind() != SDep::Data
1497 || PredDep.getSUnit()->isBoundaryNode()) {
1498 continue;
1499 }
1500 // An already visited edge is a cross edge, assuming an acyclic DAG.
1501 if (Impl.isVisited(PredDep.getSUnit())) {
1502 Impl.visitCrossEdge(PredDep, DFS.getCurr());
1503 continue;
1504 }
1505 Impl.visitPreorder(PredDep.getSUnit());
1506 DFS.follow(PredDep.getSUnit());
1507 }
1508 // Visit the top of the stack in postorder and backtrack.
1509 const SUnit *Child = DFS.getCurr();
1510 const SDep *PredDep = DFS.backtrack();
1511 Impl.visitPostorderNode(Child);
1512 if (PredDep)
1513 Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1514 if (DFS.isComplete())
1515 break;
1516 }
1517 }
1518 Impl.finalize();
1519}
1520
1521/// The root of the given SubtreeID was just scheduled. For all subtrees
1522/// connected to this tree, record the depth of the connection so that the
1523/// nearest connected subtrees can be prioritized.
1524void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1525 for (const Connection &C : SubtreeConnections[SubtreeID]) {
1526 SubtreeConnectLevels[C.TreeID] =
1527 std::max(SubtreeConnectLevels[C.TreeID], C.Level);
1528 LLVM_DEBUG(dbgs() << " Tree: " << C.TreeID << " @"
1529 << SubtreeConnectLevels[C.TreeID] << '\n');
1530 }
1531}
1532
1533#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1535 OS << InstrCount << " / " << Length << " = ";
1536 if (!Length)
1537 OS << "BADILP";
1538 else
1539 OS << format("%g", ((double)InstrCount / Length));
1540}
1541
1543 dbgs() << *this << '\n';
1544}
1545
1546namespace llvm {
1547
1550 Val.print(OS);
1551 return OS;
1552}
1553
1554} // end namespace llvm
1555
1556#endif
unsigned SubReg
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:282
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static unsigned InstrCount
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
bool End
Definition: ELF_riscv.cpp:480
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:235
static Register UseReg(const MachineOperand &MO)
hexagon widen Hexagon Store false hexagon widen loads
hexagon widen stores
IRTranslator LLVM IR MI
Equivalence classes for small integers.
A common definition of LaneBitmask for use in TableGen and CodeGen.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static void toggleKills(const MachineRegisterInfo &MRI, LiveRegUnits &LiveRegs, MachineInstr &MI, bool addToLiveRegs)
static cl::opt< unsigned > ReductionSize("dag-maps-reduction-size", cl::Hidden, cl::desc("A huge scheduling region will have maps reduced by this many " "nodes at a time. Defaults to HugeRegion / 2."))
static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL)
If this machine instr has memory reference information and it can be tracked to a normal reference to...
static bool hasDataSucc(const SUnit *SU)
static cl::opt< bool > EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::desc("Enable use of AA during MI DAG construction"))
static cl::opt< unsigned > HugeRegion("dag-maps-huge-region", cl::Hidden, cl::init(1000), cl::desc("The limit to use while constructing the DAG " "prior to scheduling, at which point a trade-off " "is made to avoid excessive compile time."))
static unsigned getReductionSize()
static void dumpSUList(const ScheduleDAGInstrs::SUList &L)
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static cl::opt< bool > SchedPrintCycles("sched-print-cycles", cl::Hidden, cl::init(false), cl::desc("Report top/bottom cycles when dumping SUnit instances"))
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
void reComputeSize()
Counts the number of SUs in this map after a reduction.
void insert(SUnit *SU, ValueType V)
Adds SU to the SUList of V.
void clear()
Clears map from all contents.
void clearList(ValueType V)
Clears the list of SUs mapped to V.
ValueType & operator[](const SUList &Key)
To keep NumNodes up to date, insert() is used instead of this operator w/ push_back().
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
void compress()
compress - Compress equivalence classes by numbering them 0 .
unsigned getNumClasses() const
getNumClasses - Return the number of equivalence classes after compress() was called.
Definition: IntEqClasses.h:72
unsigned join(unsigned a, unsigned b)
Join the equivalence classes of a and b.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
Definition: LiveRegUnits.h:73
void addReg(MCPhysReg Reg)
Adds register units covered by physical register Reg.
Definition: LiveRegUnits.h:86
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void removeRegsNotPreserved(const uint32_t *RegMask)
Removes register units not preserved by the regmask RegMask.
void removeReg(MCPhysReg Reg)
Removes all register units covered by physical register Reg.
Definition: LiveRegUnits.h:102
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
bool hasImplicitUseOfPhysReg(MCRegister Reg) const
Return true if this instruction implicitly uses the specified physical register.
Definition: MCInstrDesc.h:587
bool hasImplicitDefOfPhysReg(MCRegister Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Definition: MCInstrDesc.cpp:32
MCRegUnitMaskIterator enumerates a list of register units and their associated lane masks for Reg.
iterator_range< MCRegUnitIterator > regunits(MCRegister Reg) const
Returns an iterator range over all regunits for Reg.
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
Instructions::iterator instr_iterator
std::string getFullName() const
Return a formatted string to identify this block and its parent function.
iterator_range< succ_iterator > successors()
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasTailCall() const
Returns true if the function contains a tail call.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Representation of each machine instruction.
Definition: MachineInstr.h:71
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:974
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:958
bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
iterator_range< filtered_mop_iterator > all_uses()
Returns an iterator range over all operands that are (explicit or implicit) register uses.
Definition: MachineInstr.h:774
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:574
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:587
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
bool isConstantPhysReg(MCRegister PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
typename VectorType::iterator iterator
Definition: MapVector.h:49
iterator end()
Definition: MapVector.h:71
ValueT & operator[](const KeyT &Key)
Definition: MapVector.h:98
iterator find(const KeyT &Key)
Definition: MapVector.h:167
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
iterator begin()
Definition: MapVector.h:69
Array of PressureDiffs.
void addInstruction(unsigned Idx, const RegisterOperands &RegOpers, const MachineRegisterInfo &MRI)
Record pressure difference induced by the given operand list to node with index Idx.
void init(unsigned N)
Initialize an array of N PressureDiffs.
Special value supplied for machine level alias analysis.
Track the current register pressure at some position in the instruction stream, and remember the high...
void recede(SmallVectorImpl< VRegMaskOrUnit > *LiveUses=nullptr)
Recede across the previous instruction.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:498
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:504
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
@ MayAliasMem
Nonvolatile load/Store instructions that may alias.
Definition: ScheduleDAG.h:70
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isCall
Is a function call.
Definition: ScheduleDAG.h:287
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
Definition: ScheduleDAG.h:402
unsigned NumSuccs
Definition: ScheduleDAG.h:273
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:278
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:270
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:300
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:267
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:382
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:303
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:358
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:292
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:279
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:263
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:301
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:290
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:291
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:262
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:390
Internal state used to compute SchedDFSResult.
void visitPostorderNode(const SUnit *SU)
Called once for each node after all predecessors are visited.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, bool CheckLimit=true)
Joins the predecessor subtree with the successor that is its DFS parent.
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth)
Called by finalize() to record a connection between trees.
void finalize()
Sets each node's subtree ID to the representative ID and record connections between trees.
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ)
Adds a connection for cross edges.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ)
Called once for each tree edge after calling visitPostOrderNode on the predecessor.
void visitPreorder(const SUnit *SU)
Initializes this node's instruction count.
bool isVisited(const SUnit *SU) const
Returns true if this node been visited by the DFS traversal.
SchedDFSImpl(SchedDFSResult &r)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
LiveRegUnits LiveRegs
Set of live physical registers for updating kill flags.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
void addVRegUseDeps(SUnit *SU, unsigned OperIdx)
Adds a register data dependency if the instruction that defines the virtual register used at OperIdx ...
void addVRegDefDeps(SUnit *SU, unsigned OperIdx)
Adds register output and data dependencies from this SUnit to instructions that occur later in the sa...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
std::string getDAGName() const override
Returns a label for the region of code covered by the DAG.
MachineBasicBlock * BB
The block in which to insert instructions.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
bool CanHandleTerminators
The standard DAG builder does not normally include terminators as DAG nodes because it does not creat...
void addBarrierChain(Value2SUsMap &map)
Adds barrier chain edges from all SUs in map, and then clear the map.
void reduceHugeMemNodeMaps(Value2SUsMap &stores, Value2SUsMap &loads, unsigned N)
Reduces maps in FIFO order, by N SUs.
void addPhysRegDeps(SUnit *SU, unsigned OperIdx)
Adds register dependencies (data, anti, and output) from this SUnit to following instructions in the ...
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
VReg2SUnitOperIdxMultiMap CurrentVRegUses
Tracks the last instructions in this region using each virtual register.
void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency)
Adds dependencies as needed from all SUs in list to SU.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx)
MO is an operand of SU's instruction that defines a physical register.
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const
Returns a mask for which lanes get read/written by the given (register) machine operand.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
SUnit * newSUnit(MachineInstr *MI)
Creates a new SUnit and return a ptr to it.
void initSUnits()
Creates an SUnit for each real instruction, numbered in top-down topological order.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.
std::list< SUnit * > SUList
A list of SUnits, used in Value2SUsMap, during DAG construction.
SUnit * BarrierChain
Remember a generic side-effecting instruction as we proceed.
BatchAAResults * getAAForDep() const
Returns a (possibly null) pointer to the current BatchAAResults.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit &SU) const override
RegUnit2SUnitsMap Defs
Defs, Uses - Remember where defs and uses of each register are as we iterate upward through the instr...
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
VReg2SUnitMultiMap CurrentVRegDefs
Tracks the last instruction(s) in this region defining each virtual register.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
void insertBarrierChain(Value2SUsMap &map)
Inserts a barrier chain in a huge region, far below current SU.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
std::optional< BatchAAResults > AAForDep
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void addSchedBarrierDeps()
Adds dependencies from instructions in the current list of instructions being scheduled to scheduling...
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency=0)
Adds a chain edge between SUa and SUb, but only if both AAResults and Target fail to deny the depende...
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
const MachineFrameInfo & MFI
bool deadDefHasNoUse(const MachineOperand &MO)
Returns true if the def register in MO has no uses.
std::string getGraphNodeLabel(const SUnit *SU) const override
Returns a label for a DAG node that points to an instruction.
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
Definition: ScheduleDAG.h:794
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:578
void clearDAG()
Clears the DAG state (between regions).
Definition: ScheduleDAG.cpp:63
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:576
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:580
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577
void dumpNodeAll(const SUnit &SU) const
void dumpNodeName(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:581
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
iterator find(const KeyT &Key)
Find an element by its key.
bool empty() const
Returns true if the set is empty.
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
void clear()
Clears the set.
iterator erase(iterator I)
Erases an existing element identified by a valid iterator.
iterator end()
Returns an iterator past this container.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void eraseAll(const KeyT &K)
Erase all elements with the given key.
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
Definition: SparseSet.h:124
size_type size() const
size - Returns the number of elements in the set.
Definition: SparseSet.h:194
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:293
size_type count(const KeyT &Key) const
count - Returns 1 if this set contains an element identified by Key, 0 otherwise.
Definition: SparseSet.h:245
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:160
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
const bool HasDisjunctSubRegs
Whether the class supports two (or more) disjunct subregister indices.
LaneBitmask getLaneMask() const
Returns the combination of all lane masks of register in this class.
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
'undef' values are things that do not have specified contents.
Definition: Constants.h:1412
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
LLVM Value Representation.
Definition: Value.h:74
int getNumOccurrences() const
Definition: CommandLine.h:399
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Length
Definition: DWP.cpp:480
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value * > &Objects)
This is a wrapper around getUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
#define N
Represent the ILP of the subDAG rooted at a DAG node.
Definition: ScheduleDFS.h:34
void print(raw_ostream &OS) const
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:82
constexpr bool any() const
Definition: LaneBitmask.h:53
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:121
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:66
Record a physical register access.
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254
Mapping from virtual register to SUnit including an operand index.
An individual mapping from virtual register number to SUnit.