LLVM 22.0.0git
ScheduleDAGInstrs.cpp
Go to the documentation of this file.
1//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This implements the ScheduleDAGInstrs class, which implements
10/// re-scheduling of MachineInstrs.
11//
12//===----------------------------------------------------------------------===//
13
15
17#include "llvm/ADT/MapVector.h"
19#include "llvm/ADT/SparseSet.h"
41#include "llvm/Config/llvm-config.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/Function.h"
44#include "llvm/IR/Type.h"
45#include "llvm/IR/Value.h"
46#include "llvm/MC/LaneBitmask.h"
51#include "llvm/Support/Debug.h"
53#include "llvm/Support/Format.h"
55#include <algorithm>
56#include <cassert>
57#include <iterator>
58#include <utility>
59#include <vector>
60
61using namespace llvm;
62
63#define DEBUG_TYPE "machine-scheduler"
64
65static cl::opt<bool>
66 EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
67 cl::desc("Enable use of AA during MI DAG construction"));
68
69static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
70 cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
71
72static cl::opt<bool>
73 EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
74 cl::desc("Use TargetSchedModel for latency lookup"));
75
76static cl::opt<bool>
77 EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
78 cl::desc("Use InstrItineraryData for latency lookup"));
79
80// Note: the two options below might be used in tuning compile time vs
81// output quality. Setting HugeRegion so large that it will never be
82// reached means best-effort, but may be slow.
83
84// When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
85// together hold this many SUs, a reduction of maps will be done.
86static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
87 cl::init(1000), cl::desc("The limit to use while constructing the DAG "
88 "prior to scheduling, at which point a trade-off "
89 "is made to avoid excessive compile time."));
90
92 "dag-maps-reduction-size", cl::Hidden,
93 cl::desc("A huge scheduling region will have maps reduced by this many "
94 "nodes at a time. Defaults to HugeRegion / 2."));
95
96#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
98 "sched-print-cycles", cl::Hidden, cl::init(false),
99 cl::desc("Report top/bottom cycles when dumping SUnit instances"));
100#endif
101
102static unsigned getReductionSize() {
103 // Always reduce a huge region with half of the elements, except
104 // when user sets this number explicitly.
105 if (ReductionSize.getNumOccurrences() == 0)
106 return HugeRegion / 2;
107 return ReductionSize;
108}
109
111#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
112 dbgs() << "{ ";
113 for (const SUnit *SU : L) {
114 dbgs() << "SU(" << SU->NodeNum << ")";
115 if (SU != L.back())
116 dbgs() << ", ";
117 }
118 dbgs() << "}\n";
119#endif
120}
121
123 const MachineLoopInfo *mli,
124 bool RemoveKillFlags)
125 : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
126 RemoveKillFlags(RemoveKillFlags),
127 UnknownValue(UndefValue::get(
128 Type::getVoidTy(mf.getFunction().getContext()))), Topo(SUnits, &ExitSU) {
129 DbgValues.clear();
130
131 const TargetSubtargetInfo &ST = mf.getSubtarget();
133}
134
135/// If this machine instr has memory reference information and it can be
136/// tracked to a normal reference to a known object, return the Value
137/// for that object. This function returns false the memory location is
138/// unknown or may alias anything.
140 const MachineFrameInfo &MFI,
142 const DataLayout &DL) {
143 auto AllMMOsOkay = [&]() {
144 for (const MachineMemOperand *MMO : MI->memoperands()) {
145 // TODO: Figure out whether isAtomic is really necessary (see D57601).
146 if (MMO->isVolatile() || MMO->isAtomic())
147 return false;
148
149 if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
150 // Function that contain tail calls don't have unique PseudoSourceValue
151 // objects. Two PseudoSourceValues might refer to the same or
152 // overlapping locations. The client code calling this function assumes
153 // this is not the case. So return a conservative answer of no known
154 // object.
155 if (MFI.hasTailCall())
156 return false;
157
158 // For now, ignore PseudoSourceValues which may alias LLVM IR values
159 // because the code that uses this function has no way to cope with
160 // such aliases.
161 if (PSV->isAliased(&MFI))
162 return false;
163
164 bool MayAlias = PSV->mayAlias(&MFI);
165 Objects.emplace_back(PSV, MayAlias);
166 } else if (const Value *V = MMO->getValue()) {
168 if (!getUnderlyingObjectsForCodeGen(V, Objs))
169 return false;
170
171 for (Value *V : Objs) {
173 Objects.emplace_back(V, true);
174 }
175 } else
176 return false;
177 }
178 return true;
179 };
180
181 if (!AllMMOsOkay()) {
182 Objects.clear();
183 return false;
184 }
185
186 return true;
187}
188
190 BB = bb;
191}
192
194 // Subclasses should no longer refer to the old block.
195 BB = nullptr;
196}
197
201 unsigned regioninstrs) {
202 assert(bb == BB && "startBlock should set BB");
204 RegionEnd = end;
205 NumRegionInstrs = regioninstrs;
206}
207
209 // Nothing to do.
210}
211
213 MachineInstr *ExitMI =
214 RegionEnd != BB->end()
216 : nullptr;
217 ExitSU.setInstr(ExitMI);
218 // Add dependencies on the defs and uses of the instruction.
219 if (ExitMI) {
220 const MCInstrDesc &MIDesc = ExitMI->getDesc();
221 for (const MachineOperand &MO : ExitMI->all_uses()) {
222 unsigned OpIdx = MO.getOperandNo();
223 Register Reg = MO.getReg();
224 if (Reg.isPhysical()) {
225 // addPhysRegDataDeps uses the provided operand index to retrieve
226 // the operand use cycle from the scheduling model. If the operand
227 // is "fake" (e.g., an operand of a call instruction used to pass
228 // an argument to the called function.), the scheduling model may not
229 // have an entry for it. If this is the case, pass -1 as operand index,
230 // which will cause addPhysRegDataDeps to add an artificial dependency.
231 // FIXME: Using hasImplicitUseOfPhysReg here is inaccurate as it misses
232 // aliases. When fixing, make sure to update addPhysRegDataDeps, too.
233 bool IsRealUse = OpIdx < MIDesc.getNumOperands() ||
234 MIDesc.hasImplicitUseOfPhysReg(Reg);
235 for (MCRegUnit Unit : TRI->regunits(Reg))
236 Uses.insert(PhysRegSUOper(&ExitSU, IsRealUse ? OpIdx : -1, Unit));
237 } else if (Reg.isVirtual() && MO.readsReg()) {
239 }
240 }
241 }
242 if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
243 // For others, e.g. fallthrough, conditional branch, assume the exit
244 // uses all the registers that are livein to the successor blocks.
245 for (const MachineBasicBlock *Succ : BB->successors()) {
246 for (const auto &LI : Succ->liveins()) {
247 for (MCRegUnitMaskIterator U(LI.PhysReg, TRI); U.isValid(); ++U) {
248 auto [Unit, Mask] = *U;
249 if ((Mask & LI.LaneMask).any() && !Uses.contains(Unit))
250 Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit));
251 }
252 }
253 }
254 }
255}
256
257/// MO is an operand of SU's instruction that defines a physical register. Adds
258/// data dependencies from SU to any uses of the physical register.
259void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
260 const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
261 assert(MO.isDef() && "expect physreg def");
262 Register Reg = MO.getReg();
263
264 // Ask the target if address-backscheduling is desirable, and if so how much.
265 const TargetSubtargetInfo &ST = MF.getSubtarget();
266
267 // Only use any non-zero latency for real defs/uses, in contrast to
268 // "fake" operands added by regalloc.
269 const MCInstrDesc &DefMIDesc = SU->getInstr()->getDesc();
270 bool ImplicitPseudoDef = (OperIdx >= DefMIDesc.getNumOperands() &&
271 !DefMIDesc.hasImplicitDefOfPhysReg(Reg));
272 for (MCRegUnit Unit : TRI->regunits(Reg)) {
273 for (RegUnit2SUnitsMap::iterator I = Uses.find(Unit); I != Uses.end();
274 ++I) {
275 SUnit *UseSU = I->SU;
276 if (UseSU == SU)
277 continue;
278
279 // Adjust the dependence latency using operand def/use information,
280 // then allow the target to perform its own adjustments.
281 MachineInstr *UseInstr = nullptr;
282 int UseOpIdx = I->OpIdx;
283 bool ImplicitPseudoUse = false;
284 SDep Dep;
285 if (UseOpIdx < 0) {
286 Dep = SDep(SU, SDep::Artificial);
287 } else {
288 // Set the hasPhysRegDefs only for physreg defs that have a use within
289 // the scheduling region.
290 SU->hasPhysRegDefs = true;
291
292 UseInstr = UseSU->getInstr();
293 Register UseReg = UseInstr->getOperand(UseOpIdx).getReg();
294 const MCInstrDesc &UseMIDesc = UseInstr->getDesc();
295 ImplicitPseudoUse = UseOpIdx >= ((int)UseMIDesc.getNumOperands()) &&
297
298 Dep = SDep(SU, SDep::Data, UseReg);
299 }
300 if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
302 UseInstr, UseOpIdx));
303 } else {
304 Dep.setLatency(0);
305 }
306 ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOpIdx, Dep, &SchedModel);
307 UseSU->addPred(Dep);
308 }
309 }
310}
311
312/// Adds register dependencies (data, anti, and output) from this SUnit
313/// to following instructions in the same scheduling region that depend the
314/// physical register referenced at OperIdx.
315void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
316 MachineInstr *MI = SU->getInstr();
317 MachineOperand &MO = MI->getOperand(OperIdx);
318 Register Reg = MO.getReg();
319 // We do not need to track any dependencies for constant registers.
320 if (MRI.isConstantPhysReg(Reg))
321 return;
322
323 const TargetSubtargetInfo &ST = MF.getSubtarget();
324
325 // Optionally add output and anti dependencies. For anti
326 // dependencies we use a latency of 0 because for a multi-issue
327 // target we want to allow the defining instruction to issue
328 // in the same cycle as the using instruction.
329 // TODO: Using a latency of 1 here for output dependencies assumes
330 // there's no cost for reusing registers.
331 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
332 for (MCRegUnit Unit : TRI->regunits(Reg)) {
333 for (RegUnit2SUnitsMap::iterator I = Defs.find(Unit); I != Defs.end();
334 ++I) {
335 SUnit *DefSU = I->SU;
336 if (DefSU == &ExitSU)
337 continue;
338 MachineInstr *DefInstr = DefSU->getInstr();
339 MachineOperand &DefMO = DefInstr->getOperand(I->OpIdx);
340 if (DefSU != SU &&
341 (Kind != SDep::Output || !MO.isDead() || !DefMO.isDead())) {
342 SDep Dep(SU, Kind, DefMO.getReg());
343 if (Kind != SDep::Anti) {
344 Dep.setLatency(
345 SchedModel.computeOutputLatency(MI, OperIdx, DefInstr));
346 }
347 ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep,
348 &SchedModel);
349 DefSU->addPred(Dep);
350 }
351 }
352 }
353
354 if (MO.isUse()) {
355 SU->hasPhysRegUses = true;
356 // Either insert a new Reg2SUnits entry with an empty SUnits list, or
357 // retrieve the existing SUnits list for this register's uses.
358 // Push this SUnit on the use list.
359 for (MCRegUnit Unit : TRI->regunits(Reg))
360 Uses.insert(PhysRegSUOper(SU, OperIdx, Unit));
361 if (RemoveKillFlags)
362 MO.setIsKill(false);
363 } else {
364 addPhysRegDataDeps(SU, OperIdx);
365
366 // Clear previous uses and defs of this register and its subregisters.
367 for (MCRegUnit Unit : TRI->regunits(Reg)) {
368 Uses.eraseAll(Unit);
369 if (!MO.isDead())
370 Defs.eraseAll(Unit);
371 }
372
373 if (MO.isDead() && SU->isCall) {
374 // Calls will not be reordered because of chain dependencies (see
375 // below). Since call operands are dead, calls may continue to be added
376 // to the DefList making dependence checking quadratic in the size of
377 // the block. Instead, we leave only one call at the back of the
378 // DefList.
379 for (MCRegUnit Unit : TRI->regunits(Reg)) {
383 for (bool isBegin = I == B; !isBegin; /* empty */) {
384 isBegin = (--I) == B;
385 if (!I->SU->isCall)
386 break;
387 I = Defs.erase(I);
388 }
389 }
390 }
391
392 // Defs are pushed in the order they are visited and never reordered.
393 for (MCRegUnit Unit : TRI->regunits(Reg))
394 Defs.insert(PhysRegSUOper(SU, OperIdx, Unit));
395 }
396}
397
399{
400 Register Reg = MO.getReg();
401 // No point in tracking lanemasks if we don't have interesting subregisters.
402 const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
403 if (!RC.HasDisjunctSubRegs)
404 return LaneBitmask::getAll();
405
406 unsigned SubReg = MO.getSubReg();
407 if (SubReg == 0)
408 return RC.getLaneMask();
410}
411
413 auto RegUse = CurrentVRegUses.find(MO.getReg());
414 if (RegUse == CurrentVRegUses.end())
415 return true;
416 return (RegUse->LaneMask & getLaneMaskForMO(MO)).none();
417}
418
419/// Adds register output and data dependencies from this SUnit to instructions
420/// that occur later in the same scheduling region if they read from or write to
421/// the virtual register defined at OperIdx.
422///
423/// TODO: Hoist loop induction variable increments. This has to be
424/// reevaluated. Generally, IV scheduling should be done before coalescing.
425void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
426 MachineInstr *MI = SU->getInstr();
427 MachineOperand &MO = MI->getOperand(OperIdx);
428 Register Reg = MO.getReg();
429
430 LaneBitmask DefLaneMask;
431 LaneBitmask KillLaneMask;
432 if (TrackLaneMasks) {
433 bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
434 DefLaneMask = getLaneMaskForMO(MO);
435 // If we have a <read-undef> flag, none of the lane values comes from an
436 // earlier instruction.
437 KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
438
439 if (MO.getSubReg() != 0 && MO.isUndef()) {
440 // There may be other subregister defs on the same instruction of the same
441 // register in later operands. The lanes of other defs will now be live
442 // after this instruction, so these should not be treated as killed by the
443 // instruction even though they appear to be killed in this one operand.
444 for (const MachineOperand &OtherMO :
445 llvm::drop_begin(MI->operands(), OperIdx + 1))
446 if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
447 KillLaneMask &= ~getLaneMaskForMO(OtherMO);
448 }
449
450 // Clear undef flag, we'll re-add it later once we know which subregister
451 // Def is first.
452 MO.setIsUndef(false);
453 } else {
454 DefLaneMask = LaneBitmask::getAll();
455 KillLaneMask = LaneBitmask::getAll();
456 }
457
458 if (MO.isDead()) {
459 assert(deadDefHasNoUse(MO) && "Dead defs should have no uses");
460 } else {
461 // Add data dependence to all uses we found so far.
462 const TargetSubtargetInfo &ST = MF.getSubtarget();
464 E = CurrentVRegUses.end(); I != E; /*empty*/) {
465 LaneBitmask LaneMask = I->LaneMask;
466 // Ignore uses of other lanes.
467 if ((LaneMask & KillLaneMask).none()) {
468 ++I;
469 continue;
470 }
471
472 if ((LaneMask & DefLaneMask).any()) {
473 SUnit *UseSU = I->SU;
474 MachineInstr *Use = UseSU->getInstr();
475 SDep Dep(SU, SDep::Data, Reg);
477 I->OperandIndex));
478 ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep,
479 &SchedModel);
480 UseSU->addPred(Dep);
481 }
482
483 LaneMask &= ~KillLaneMask;
484 // If we found a Def for all lanes of this use, remove it from the list.
485 if (LaneMask.any()) {
486 I->LaneMask = LaneMask;
487 ++I;
488 } else
490 }
491 }
492
493 // Shortcut: Singly defined vregs do not have output/anti dependencies.
494 if (MRI.hasOneDef(Reg))
495 return;
496
497 // Add output dependence to the next nearest defs of this vreg.
498 //
499 // Unless this definition is dead, the output dependence should be
500 // transitively redundant with antidependencies from this definition's
501 // uses. We're conservative for now until we have a way to guarantee the uses
502 // are not eliminated sometime during scheduling. The output dependence edge
503 // is also useful if output latency exceeds def-use latency.
504 LaneBitmask LaneMask = DefLaneMask;
505 for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
506 CurrentVRegDefs.end())) {
507 // Ignore defs for other lanes.
508 if ((V2SU.LaneMask & LaneMask).none())
509 continue;
510 // Add an output dependence.
511 SUnit *DefSU = V2SU.SU;
512 // Ignore additional defs of the same lanes in one instruction. This can
513 // happen because lanemasks are shared for targets with too many
514 // subregisters. We also use some representration tricks/hacks where we
515 // add super-register defs/uses, to imply that although we only access parts
516 // of the reg we care about the full one.
517 if (DefSU == SU)
518 continue;
519 SDep Dep(SU, SDep::Output, Reg);
520 Dep.setLatency(
521 SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
522 DefSU->addPred(Dep);
523
524 // Update current definition. This can get tricky if the def was about a
525 // bigger lanemask before. We then have to shrink it and create a new
526 // VReg2SUnit for the non-overlapping part.
527 LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
528 LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
529 V2SU.SU = SU;
530 V2SU.LaneMask = OverlapMask;
531 if (NonOverlapMask.any())
532 CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
533 }
534 // If there was no CurrentVRegDefs entry for some lanes yet, create one.
535 if (LaneMask.any())
536 CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
537}
538
539/// Adds a register data dependency if the instruction that defines the
540/// virtual register used at OperIdx is mapped to an SUnit. Add a register
541/// antidependency from this SUnit to instructions that occur later in the same
542/// scheduling region if they write the virtual register.
543///
544/// TODO: Handle ExitSU "uses" properly.
545void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
546 const MachineInstr *MI = SU->getInstr();
547 assert(!MI->isDebugOrPseudoInstr());
548
549 const MachineOperand &MO = MI->getOperand(OperIdx);
550 Register Reg = MO.getReg();
551
552 // Remember the use. Data dependencies will be added when we find the def.
555 CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
556
557 // Add antidependences to the following defs of the vreg.
558 for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
559 CurrentVRegDefs.end())) {
560 // Ignore defs for unrelated lanes.
561 LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
562 if ((PrevDefLaneMask & LaneMask).none())
563 continue;
564 if (V2SU.SU == SU)
565 continue;
566
567 V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
568 }
569}
570
571
573 unsigned Latency) {
574 if (SUa->getInstr()->mayAlias(getAAForDep(), *SUb->getInstr(), UseTBAA)) {
575 SDep Dep(SUa, SDep::MayAliasMem);
576 Dep.setLatency(Latency);
577 SUb->addPred(Dep);
578 }
579}
580
581/// Creates an SUnit for each real instruction, numbered in top-down
582/// topological order. The instruction order A < B, implies that no edge exists
583/// from B to A.
584///
585/// Map each real instruction to its SUnit.
586///
587/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
588/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
589/// instead of pointers.
590///
591/// MachineScheduler relies on initSUnits numbering the nodes by their order in
592/// the original instruction list.
594 // We'll be allocating one SUnit for each real instruction in the region,
595 // which is contained within a basic block.
596 SUnits.reserve(NumRegionInstrs);
597
599 if (MI.isDebugOrPseudoInstr())
600 continue;
601
602 SUnit *SU = newSUnit(&MI);
603 MISUnitMap[&MI] = SU;
604
605 SU->isCall = MI.isCall();
606 SU->isCommutable = MI.isCommutable();
607
608 // Assign the Latency field of SU using target-provided information.
609 SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
610
611 // If this SUnit uses a reserved or unbuffered resource, mark it as such.
612 //
613 // Reserved resources block an instruction from issuing and stall the
614 // entire pipeline. These are identified by BufferSize=0.
615 //
616 // Unbuffered resources prevent execution of subsequent instructions that
617 // require the same resources. This is used for in-order execution pipelines
618 // within an out-of-order core. These are identified by BufferSize=1.
620 const MCSchedClassDesc *SC = getSchedClass(SU);
621 for (const MCWriteProcResEntry &PRE :
624 switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
625 case 0:
626 SU->hasReservedResource = true;
627 break;
628 case 1:
629 SU->isUnbuffered = true;
630 break;
631 default:
632 break;
633 }
634 }
635 }
636 }
637}
638
640 : public SmallMapVector<ValueType, SUList, 4> {
641 /// Current total number of SUs in map.
642 unsigned NumNodes = 0;
643
644 /// 1 for loads, 0 for stores. (see comment in SUList)
645 unsigned TrueMemOrderLatency;
646
647public:
648 Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
649
650 /// To keep NumNodes up to date, insert() is used instead of
651 /// this operator w/ push_back().
653 llvm_unreachable("Don't use. Use insert() instead."); };
654
655 /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
656 /// reduce().
657 void inline insert(SUnit *SU, ValueType V) {
658 MapVector::operator[](V).push_back(SU);
659 NumNodes++;
660 }
661
662 /// Clears the list of SUs mapped to V.
663 void inline clearList(ValueType V) {
664 iterator Itr = find(V);
665 if (Itr != end()) {
666 assert(NumNodes >= Itr->second.size());
667 NumNodes -= Itr->second.size();
668
669 Itr->second.clear();
670 }
671 }
672
673 /// Clears map from all contents.
674 void clear() {
676 NumNodes = 0;
677 }
678
679 unsigned inline size() const { return NumNodes; }
680
681 /// Counts the number of SUs in this map after a reduction.
683 NumNodes = 0;
684 for (auto &I : *this)
685 NumNodes += I.second.size();
686 }
687
688 unsigned inline getTrueMemOrderLatency() const {
689 return TrueMemOrderLatency;
690 }
691
692 void dump();
693};
694
696 Value2SUsMap &Val2SUsMap) {
697 for (auto &I : Val2SUsMap)
698 addChainDependencies(SU, I.second,
699 Val2SUsMap.getTrueMemOrderLatency());
700}
701
703 Value2SUsMap &Val2SUsMap,
704 ValueType V) {
705 Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
706 if (Itr != Val2SUsMap.end())
707 addChainDependencies(SU, Itr->second,
708 Val2SUsMap.getTrueMemOrderLatency());
709}
710
712 assert(BarrierChain != nullptr);
713
714 for (auto &[V, SUs] : map) {
715 (void)V;
716 for (auto *SU : SUs)
717 SU->addPredBarrier(BarrierChain);
718 }
719 map.clear();
720}
721
723 assert(BarrierChain != nullptr);
724
725 // Go through all lists of SUs.
726 for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
727 Value2SUsMap::iterator CurrItr = I++;
728 SUList &sus = CurrItr->second;
729 SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
730 for (; SUItr != SUEE; ++SUItr) {
731 // Stop on BarrierChain or any instruction above it.
732 if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
733 break;
734
735 (*SUItr)->addPredBarrier(BarrierChain);
736 }
737
738 // Remove also the BarrierChain from list if present.
739 if (SUItr != SUEE && *SUItr == BarrierChain)
740 SUItr++;
741
742 // Remove all SUs that are now successors of BarrierChain.
743 if (SUItr != sus.begin())
744 sus.erase(sus.begin(), SUItr);
745 }
746
747 // Remove all entries with empty su lists.
748 map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
749 return (mapEntry.second.empty()); });
750
751 // Recompute the size of the map (NumNodes).
752 map.reComputeSize();
753}
754
756 RegPressureTracker *RPTracker,
757 PressureDiffs *PDiffs,
758 LiveIntervals *LIS,
759 bool TrackLaneMasks) {
760 const TargetSubtargetInfo &ST = MF.getSubtarget();
762 : ST.useAA();
763 if (UseAA && AA)
764 AAForDep.emplace(*AA);
765
766 BarrierChain = nullptr;
767
768 this->TrackLaneMasks = TrackLaneMasks;
769 MISUnitMap.clear();
771
772 // Create an SUnit for each real instruction.
773 initSUnits();
774
775 if (PDiffs)
776 PDiffs->init(SUnits.size());
777
778 // We build scheduling units by walking a block's instruction list
779 // from bottom to top.
780
781 // Each MIs' memory operand(s) is analyzed to a list of underlying
782 // objects. The SU is then inserted in the SUList(s) mapped from the
783 // Value(s). Each Value thus gets mapped to lists of SUs depending
784 // on it, stores and loads kept separately. Two SUs are trivially
785 // non-aliasing if they both depend on only identified Values and do
786 // not share any common Value.
787 Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
788
789 // Certain memory accesses are known to not alias any SU in Stores
790 // or Loads, and have therefore their own 'NonAlias'
791 // domain. E.g. spill / reload instructions never alias LLVM I/R
792 // Values. It would be nice to assume that this type of memory
793 // accesses always have a proper memory operand modelling, and are
794 // therefore never unanalyzable, but this is conservatively not
795 // done.
796 Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
797
798 // Track all instructions that may raise floating-point exceptions.
799 // These do not depend on one other (or normal loads or stores), but
800 // must not be rescheduled across global barriers. Note that we don't
801 // really need a "map" here since we don't track those MIs by value;
802 // using the same Value2SUsMap data type here is simply a matter of
803 // convenience.
804 Value2SUsMap FPExceptions;
805
806 // Remove any stale debug info; sometimes BuildSchedGraph is called again
807 // without emitting the info from the previous call.
808 DbgValues.clear();
809 FirstDbgValue = nullptr;
810
811 assert(Defs.empty() && Uses.empty() &&
812 "Only BuildGraph should update Defs/Uses");
815
816 assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
817 assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
818 unsigned NumVirtRegs = MRI.getNumVirtRegs();
819 CurrentVRegDefs.setUniverse(NumVirtRegs);
820 CurrentVRegUses.setUniverse(NumVirtRegs);
821
822 // Model data dependencies between instructions being scheduled and the
823 // ExitSU.
825
826 // Walk the list of instructions, from bottom moving up.
827 MachineInstr *DbgMI = nullptr;
829 MII != MIE; --MII) {
830 MachineInstr &MI = *std::prev(MII);
831 if (DbgMI) {
832 DbgValues.emplace_back(DbgMI, &MI);
833 DbgMI = nullptr;
834 }
835
836 if (MI.isDebugValue() || MI.isDebugPHI()) {
837 DbgMI = &MI;
838 continue;
839 }
840
841 if (MI.isDebugLabel() || MI.isDebugRef() || MI.isPseudoProbe())
842 continue;
843
844 SUnit *SU = MISUnitMap[&MI];
845 assert(SU && "No SUnit mapped to this MI");
846
847 if (RPTracker) {
848 RegisterOperands RegOpers;
849 RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
850 if (TrackLaneMasks) {
851 SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
852 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
853 }
854 if (PDiffs != nullptr)
855 PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
856
857 if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI)
858 RPTracker->recedeSkipDebugValues();
859 assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
860 RPTracker->recede(RegOpers);
861 }
862
863 assert(
864 (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
865 "Cannot schedule terminators or labels!");
866
867 // Add register-based dependencies (data, anti, and output).
868 // For some instructions (calls, returns, inline-asm, etc.) there can
869 // be explicit uses and implicit defs, in which case the use will appear
870 // on the operand list before the def. Do two passes over the operand
871 // list to make sure that defs are processed before any uses.
872 bool HasVRegDef = false;
873 for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
874 const MachineOperand &MO = MI.getOperand(j);
875 if (!MO.isReg() || !MO.isDef())
876 continue;
877 Register Reg = MO.getReg();
878 if (Reg.isPhysical()) {
879 addPhysRegDeps(SU, j);
880 } else if (Reg.isVirtual()) {
881 HasVRegDef = true;
882 addVRegDefDeps(SU, j);
883 }
884 }
885 // Now process all uses.
886 for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
887 const MachineOperand &MO = MI.getOperand(j);
888 // Only look at use operands.
889 // We do not need to check for MO.readsReg() here because subsequent
890 // subregister defs will get output dependence edges and need no
891 // additional use dependencies.
892 if (!MO.isReg() || !MO.isUse())
893 continue;
894 Register Reg = MO.getReg();
895 if (Reg.isPhysical()) {
896 addPhysRegDeps(SU, j);
897 } else if (Reg.isVirtual() && MO.readsReg()) {
898 addVRegUseDeps(SU, j);
899 }
900 }
901
902 // If we haven't seen any uses in this scheduling region, create a
903 // dependence edge to ExitSU to model the live-out latency. This is required
904 // for vreg defs with no in-region use, and prefetches with no vreg def.
905 //
906 // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
907 // check currently relies on being called before adding chain deps.
908 if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
909 SDep Dep(SU, SDep::Artificial);
910 Dep.setLatency(SU->Latency - 1);
911 ExitSU.addPred(Dep);
912 }
913
914 // Add memory dependencies (Note: isStoreToStackSlot and
915 // isLoadFromStackSLot are not usable after stack slots are lowered to
916 // actual addresses).
917
918 const TargetInstrInfo *TII = ST.getInstrInfo();
919 // This is a barrier event that acts as a pivotal node in the DAG.
920 if (TII->isGlobalMemoryObject(&MI)) {
921
922 // Become the barrier chain.
923 if (BarrierChain)
925 BarrierChain = SU;
926
927 LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
928 << BarrierChain->NodeNum << ").\n");
929
930 // Add dependencies against everything below it and clear maps.
931 addBarrierChain(Stores);
932 addBarrierChain(Loads);
933 addBarrierChain(NonAliasStores);
934 addBarrierChain(NonAliasLoads);
935 addBarrierChain(FPExceptions);
936
937 continue;
938 }
939
940 // Instructions that may raise FP exceptions may not be moved
941 // across any global barriers.
942 if (MI.mayRaiseFPException()) {
943 if (BarrierChain)
945
946 FPExceptions.insert(SU, UnknownValue);
947
948 if (FPExceptions.size() >= HugeRegion) {
949 LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n");
950 Value2SUsMap empty;
951 reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize());
952 }
953 }
954
955 // If it's not a store or a variant load, we're done.
956 if (!MI.mayStore() &&
957 !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad()))
958 continue;
959
960 // Always add dependecy edge to BarrierChain if present.
961 if (BarrierChain)
963
964 // Find the underlying objects for MI. The Objs vector is either
965 // empty, or filled with the Values of memory locations which this
966 // SU depends on.
968 bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs,
969 MF.getDataLayout());
970
971 if (MI.mayStore()) {
972 if (!ObjsFound) {
973 // An unknown store depends on all stores and loads.
974 addChainDependencies(SU, Stores);
975 addChainDependencies(SU, NonAliasStores);
976 addChainDependencies(SU, Loads);
977 addChainDependencies(SU, NonAliasLoads);
978
979 // Map this store to 'UnknownValue'.
980 Stores.insert(SU, UnknownValue);
981 } else {
982 // Add precise dependencies against all previously seen memory
983 // accesses mapped to the same Value(s).
984 for (const UnderlyingObject &UnderlObj : Objs) {
985 ValueType V = UnderlObj.getValue();
986 bool ThisMayAlias = UnderlObj.mayAlias();
987
988 // Add dependencies to previous stores and loads mapped to V.
989 addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
990 addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
991 }
992 // Update the store map after all chains have been added to avoid adding
993 // self-loop edge if multiple underlying objects are present.
994 for (const UnderlyingObject &UnderlObj : Objs) {
995 ValueType V = UnderlObj.getValue();
996 bool ThisMayAlias = UnderlObj.mayAlias();
997
998 // Map this store to V.
999 (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
1000 }
1001 // The store may have dependencies to unanalyzable loads and
1002 // stores.
1005 }
1006 } else { // SU is a load.
1007 if (!ObjsFound) {
1008 // An unknown load depends on all stores.
1009 addChainDependencies(SU, Stores);
1010 addChainDependencies(SU, NonAliasStores);
1011
1012 Loads.insert(SU, UnknownValue);
1013 } else {
1014 for (const UnderlyingObject &UnderlObj : Objs) {
1015 ValueType V = UnderlObj.getValue();
1016 bool ThisMayAlias = UnderlObj.mayAlias();
1017
1018 // Add precise dependencies against all previously seen stores
1019 // mapping to the same Value(s).
1020 addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
1021
1022 // Map this load to V.
1023 (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
1024 }
1025 // The load may have dependencies to unanalyzable stores.
1027 }
1028 }
1029
1030 // Reduce maps if they grow huge.
1031 if (Stores.size() + Loads.size() >= HugeRegion) {
1032 LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n");
1033 reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
1034 }
1035 if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
1036 LLVM_DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n");
1037 reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
1038 }
1039 }
1040
1041 if (DbgMI)
1042 FirstDbgValue = DbgMI;
1043
1044 Defs.clear();
1045 Uses.clear();
1048
1049 Topo.MarkDirty();
1050}
1051
1053 PSV->printCustom(OS);
1054 return OS;
1055}
1056
1058 for (const auto &[ValType, SUs] : *this) {
1059 if (isa<const Value *>(ValType)) {
1060 const Value *V = cast<const Value *>(ValType);
1061 if (isa<UndefValue>(V))
1062 dbgs() << "Unknown";
1063 else
1064 V->printAsOperand(dbgs());
1065 } else if (isa<const PseudoSourceValue *>(ValType))
1066 dbgs() << cast<const PseudoSourceValue *>(ValType);
1067 else
1068 llvm_unreachable("Unknown Value type.");
1069
1070 dbgs() << " : ";
1071 dumpSUList(SUs);
1072 }
1073}
1074
1076 Value2SUsMap &loads, unsigned N) {
1077 LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump();
1078 dbgs() << "Loading SUnits:\n"; loads.dump());
1079
1080 // Insert all SU's NodeNums into a vector and sort it.
1081 std::vector<unsigned> NodeNums;
1082 NodeNums.reserve(stores.size() + loads.size());
1083 for (const auto &[V, SUs] : stores) {
1084 (void)V;
1085 for (const auto *SU : SUs)
1086 NodeNums.push_back(SU->NodeNum);
1087 }
1088 for (const auto &[V, SUs] : loads) {
1089 (void)V;
1090 for (const auto *SU : SUs)
1091 NodeNums.push_back(SU->NodeNum);
1092 }
1093 llvm::sort(NodeNums);
1094
1095 // The N last elements in NodeNums will be removed, and the SU with
1096 // the lowest NodeNum of them will become the new BarrierChain to
1097 // let the not yet seen SUs have a dependency to the removed SUs.
1098 assert(N <= NodeNums.size());
1099 SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
1100 if (BarrierChain) {
1101 // The aliasing and non-aliasing maps reduce independently of each
1102 // other, but share a common BarrierChain. Check if the
1103 // newBarrierChain is above the former one. If it is not, it may
1104 // introduce a loop to use newBarrierChain, so keep the old one.
1105 if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1106 BarrierChain->addPredBarrier(newBarrierChain);
1107 BarrierChain = newBarrierChain;
1108 LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU("
1109 << BarrierChain->NodeNum << ").\n");
1110 }
1111 else
1112 LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU("
1113 << BarrierChain->NodeNum << ").\n");
1114 }
1115 else
1116 BarrierChain = newBarrierChain;
1117
1120
1121 LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump();
1122 dbgs() << "Loading SUnits:\n"; loads.dump());
1123}
1124
1125static void toggleKills(const MachineRegisterInfo &MRI, LiveRegUnits &LiveRegs,
1126 MachineInstr &MI, bool addToLiveRegs) {
1127 for (MachineOperand &MO : MI.operands()) {
1128 if (!MO.isReg() || !MO.readsReg())
1129 continue;
1130 Register Reg = MO.getReg();
1131 if (!Reg)
1132 continue;
1133
1134 // Things that are available after the instruction are killed by it.
1135 bool IsKill = LiveRegs.available(Reg);
1136
1137 // Exception: Do not kill reserved registers
1138 MO.setIsKill(IsKill && !MRI.isReserved(Reg));
1139 if (addToLiveRegs)
1140 LiveRegs.addReg(Reg);
1141 }
1142}
1143
1145 LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
1146
1147 LiveRegs.init(*TRI);
1148 LiveRegs.addLiveOuts(MBB);
1149
1150 // Examine block from end to start...
1151 for (MachineInstr &MI : llvm::reverse(MBB)) {
1152 if (MI.isDebugOrPseudoInstr())
1153 continue;
1154
1155 // Update liveness. Registers that are defed but not used in this
1156 // instruction are now dead. Mark register and all subregs as they
1157 // are completely defined.
1158 for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
1159 const MachineOperand &MO = *O;
1160 if (MO.isReg()) {
1161 if (!MO.isDef())
1162 continue;
1163 Register Reg = MO.getReg();
1164 if (!Reg)
1165 continue;
1166 LiveRegs.removeReg(Reg);
1167 } else if (MO.isRegMask()) {
1168 LiveRegs.removeRegsNotPreserved(MO.getRegMask());
1169 }
1170 }
1171
1172 // If there is a bundle header fix it up first.
1173 if (!MI.isBundled()) {
1174 toggleKills(MRI, LiveRegs, MI, true);
1175 } else {
1176 MachineBasicBlock::instr_iterator Bundle = MI.getIterator();
1177 if (MI.isBundle())
1178 toggleKills(MRI, LiveRegs, MI, false);
1179
1180 // Some targets make the (questionable) assumtion that the instructions
1181 // inside the bundle are ordered and consequently only the last use of
1182 // a register inside the bundle can kill it.
1183 MachineBasicBlock::instr_iterator I = std::next(Bundle);
1184 while (I->isBundledWithSucc())
1185 ++I;
1186 do {
1187 if (!I->isDebugOrPseudoInstr())
1188 toggleKills(MRI, LiveRegs, *I, true);
1189 --I;
1190 } while (I != Bundle);
1191 }
1192 }
1193}
1194
1195void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
1196#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1197 dumpNodeName(SU);
1198 if (SchedPrintCycles)
1199 dbgs() << " [TopReadyCycle = " << SU.TopReadyCycle
1200 << ", BottomReadyCycle = " << SU.BotReadyCycle << "]";
1201 dbgs() << ": ";
1202 SU.getInstr()->dump();
1203#endif
1204}
1205
1207#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1208 if (EntrySU.getInstr() != nullptr)
1210 for (const SUnit &SU : SUnits)
1211 dumpNodeAll(SU);
1212 if (ExitSU.getInstr() != nullptr)
1214#endif
1215}
1216
1217std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1218 std::string s;
1219 raw_string_ostream oss(s);
1220 if (SU == &EntrySU)
1221 oss << "<entry>";
1222 else if (SU == &ExitSU)
1223 oss << "<exit>";
1224 else
1225 SU->getInstr()->print(oss, /*IsStandalone=*/true);
1226 return s;
1227}
1228
1229/// Return the basic block label. It is not necessarily unique because a block
1230/// contains multiple scheduling regions. But it is fine for visualization.
1232 return "dag." + BB->getFullName();
1233}
1234
1236 return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
1237}
1238
1239bool ScheduleDAGInstrs::addEdge(SUnit *SuccSU, const SDep &PredDep) {
1240 if (SuccSU != &ExitSU) {
1241 // Do not use WillCreateCycle, it assumes SD scheduling.
1242 // If Pred is reachable from Succ, then the edge creates a cycle.
1243 if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
1244 return false;
1245 Topo.AddPredQueued(SuccSU, PredDep.getSUnit());
1246 }
1247 SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
1248 // Return true regardless of whether a new edge needed to be inserted.
1249 return true;
1250}
1251
1252//===----------------------------------------------------------------------===//
1253// SchedDFSResult Implementation
1254//===----------------------------------------------------------------------===//
1255
1256namespace llvm {
1257
1258/// Internal state used to compute SchedDFSResult.
1260 SchedDFSResult &R;
1261
1262 /// Join DAG nodes into equivalence classes by their subtree.
1263 IntEqClasses SubtreeClasses;
1264 /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1265 std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
1266
1267 struct RootData {
1268 unsigned NodeID;
1269 unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
1270 unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
1271 /// children.
1272
1273 RootData(unsigned id): NodeID(id),
1274 ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
1275
1276 unsigned getSparseSetIndex() const { return NodeID; }
1277 };
1278
1279 SparseSet<RootData> RootSet;
1280
1281public:
1282 SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1283 RootSet.setUniverse(R.DFSNodeData.size());
1284 }
1285
1286 /// Returns true if this node been visited by the DFS traversal.
1287 ///
1288 /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1289 /// ID. Later, SubtreeID is updated but remains valid.
1290 bool isVisited(const SUnit *SU) const {
1291 return R.DFSNodeData[SU->NodeNum].SubtreeID
1292 != SchedDFSResult::InvalidSubtreeID;
1293 }
1294
1295 /// Initializes this node's instruction count. We don't need to flag the node
1296 /// visited until visitPostorder because the DAG cannot have cycles.
1297 void visitPreorder(const SUnit *SU) {
1298 R.DFSNodeData[SU->NodeNum].InstrCount =
1299 SU->getInstr()->isTransient() ? 0 : 1;
1300 }
1301
1302 /// Called once for each node after all predecessors are visited. Revisit this
1303 /// node's predecessors and potentially join them now that we know the ILP of
1304 /// the other predecessors.
1305 void visitPostorderNode(const SUnit *SU) {
1306 // Mark this node as the root of a subtree. It may be joined with its
1307 // successors later.
1308 R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1309 RootData RData(SU->NodeNum);
1310 RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1311
1312 // If any predecessors are still in their own subtree, they either cannot be
1313 // joined or are large enough to remain separate. If this parent node's
1314 // total instruction count is not greater than a child subtree by at least
1315 // the subtree limit, then try to join it now since splitting subtrees is
1316 // only useful if multiple high-pressure paths are possible.
1317 unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1318 for (const SDep &PredDep : SU->Preds) {
1319 if (PredDep.getKind() != SDep::Data)
1320 continue;
1321 unsigned PredNum = PredDep.getSUnit()->NodeNum;
1322 if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1323 joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
1324
1325 // Either link or merge the TreeData entry from the child to the parent.
1326 if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1327 // If the predecessor's parent is invalid, this is a tree edge and the
1328 // current node is the parent.
1329 if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1330 RootSet[PredNum].ParentNodeID = SU->NodeNum;
1331 }
1332 else if (RootSet.count(PredNum)) {
1333 // The predecessor is not a root, but is still in the root set. This
1334 // must be the new parent that it was just joined to. Note that
1335 // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1336 // set to the original parent.
1337 RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1338 RootSet.erase(PredNum);
1339 }
1340 }
1341 RootSet[SU->NodeNum] = RData;
1342 }
1343
1344 /// Called once for each tree edge after calling visitPostOrderNode on
1345 /// the predecessor. Increment the parent node's instruction count and
1346 /// preemptively join this subtree to its parent's if it is small enough.
1347 void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1348 R.DFSNodeData[Succ->NodeNum].InstrCount
1349 += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1350 joinPredSubtree(PredDep, Succ);
1351 }
1352
1353 /// Adds a connection for cross edges.
1354 void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1355 ConnectionPairs.emplace_back(PredDep.getSUnit(), Succ);
1356 }
1357
1358 /// Sets each node's subtree ID to the representative ID and record
1359 /// connections between trees.
1360 void finalize() {
1361 SubtreeClasses.compress();
1362 R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1363 assert(SubtreeClasses.getNumClasses() == RootSet.size()
1364 && "number of roots should match trees");
1365 for (const RootData &Root : RootSet) {
1366 unsigned TreeID = SubtreeClasses[Root.NodeID];
1367 if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1368 R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
1369 R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
1370 // Note that SubInstrCount may be greater than InstrCount if we joined
1371 // subtrees across a cross edge. InstrCount will be attributed to the
1372 // original parent, while SubInstrCount will be attributed to the joined
1373 // parent.
1374 }
1375 R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1376 R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1377 LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1378 for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1379 R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1380 LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1381 << R.DFSNodeData[Idx].SubtreeID << '\n');
1382 }
1383 for (const auto &[Pred, Succ] : ConnectionPairs) {
1384 unsigned PredTree = SubtreeClasses[Pred->NodeNum];
1385 unsigned SuccTree = SubtreeClasses[Succ->NodeNum];
1386 if (PredTree == SuccTree)
1387 continue;
1388 unsigned Depth = Pred->getDepth();
1389 addConnection(PredTree, SuccTree, Depth);
1390 addConnection(SuccTree, PredTree, Depth);
1391 }
1392 }
1393
1394protected:
1395 /// Joins the predecessor subtree with the successor that is its DFS parent.
1396 /// Applies some heuristics before joining.
1397 bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1398 bool CheckLimit = true) {
1399 assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1400
1401 // Check if the predecessor is already joined.
1402 const SUnit *PredSU = PredDep.getSUnit();
1403 unsigned PredNum = PredSU->NodeNum;
1404 if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1405 return false;
1406
1407 // Four is the magic number of successors before a node is considered a
1408 // pinch point.
1409 unsigned NumDataSucs = 0;
1410 for (const SDep &SuccDep : PredSU->Succs) {
1411 if (SuccDep.getKind() == SDep::Data) {
1412 if (++NumDataSucs >= 4)
1413 return false;
1414 }
1415 }
1416 if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1417 return false;
1418 R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1419 SubtreeClasses.join(Succ->NodeNum, PredNum);
1420 return true;
1421 }
1422
1423 /// Called by finalize() to record a connection between trees.
1424 void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1425 if (!Depth)
1426 return;
1427
1428 do {
1430 R.SubtreeConnections[FromTree];
1431 for (SchedDFSResult::Connection &C : Connections) {
1432 if (C.TreeID == ToTree) {
1433 C.Level = std::max(C.Level, Depth);
1434 return;
1435 }
1436 }
1437 Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1438 FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1439 } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1440 }
1441};
1442
1443} // end namespace llvm
1444
1445namespace {
1446
1447/// Manage the stack used by a reverse depth-first search over the DAG.
1448class SchedDAGReverseDFS {
1449 std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
1450
1451public:
1452 bool isComplete() const { return DFSStack.empty(); }
1453
1454 void follow(const SUnit *SU) {
1455 DFSStack.emplace_back(SU, SU->Preds.begin());
1456 }
1457 void advance() { ++DFSStack.back().second; }
1458
1459 const SDep *backtrack() {
1460 DFSStack.pop_back();
1461 return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
1462 }
1463
1464 const SUnit *getCurr() const { return DFSStack.back().first; }
1465
1466 SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1467
1468 SUnit::const_pred_iterator getPredEnd() const {
1469 return getCurr()->Preds.end();
1470 }
1471};
1472
1473} // end anonymous namespace
1474
1475static bool hasDataSucc(const SUnit *SU) {
1476 for (const SDep &SuccDep : SU->Succs) {
1477 if (SuccDep.getKind() == SDep::Data &&
1478 !SuccDep.getSUnit()->isBoundaryNode())
1479 return true;
1480 }
1481 return false;
1482}
1483
1484/// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
1485/// search from this root.
1487 if (!IsBottomUp)
1488 llvm_unreachable("Top-down ILP metric is unimplemented");
1489
1490 SchedDFSImpl Impl(*this);
1491 for (const SUnit &SU : SUnits) {
1492 if (Impl.isVisited(&SU) || hasDataSucc(&SU))
1493 continue;
1494
1495 SchedDAGReverseDFS DFS;
1496 Impl.visitPreorder(&SU);
1497 DFS.follow(&SU);
1498 while (true) {
1499 // Traverse the leftmost path as far as possible.
1500 while (DFS.getPred() != DFS.getPredEnd()) {
1501 const SDep &PredDep = *DFS.getPred();
1502 DFS.advance();
1503 // Ignore non-data edges.
1504 if (PredDep.getKind() != SDep::Data
1505 || PredDep.getSUnit()->isBoundaryNode()) {
1506 continue;
1507 }
1508 // An already visited edge is a cross edge, assuming an acyclic DAG.
1509 if (Impl.isVisited(PredDep.getSUnit())) {
1510 Impl.visitCrossEdge(PredDep, DFS.getCurr());
1511 continue;
1512 }
1513 Impl.visitPreorder(PredDep.getSUnit());
1514 DFS.follow(PredDep.getSUnit());
1515 }
1516 // Visit the top of the stack in postorder and backtrack.
1517 const SUnit *Child = DFS.getCurr();
1518 const SDep *PredDep = DFS.backtrack();
1519 Impl.visitPostorderNode(Child);
1520 if (PredDep)
1521 Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1522 if (DFS.isComplete())
1523 break;
1524 }
1525 }
1526 Impl.finalize();
1527}
1528
1529/// The root of the given SubtreeID was just scheduled. For all subtrees
1530/// connected to this tree, record the depth of the connection so that the
1531/// nearest connected subtrees can be prioritized.
1532void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1533 for (const Connection &C : SubtreeConnections[SubtreeID]) {
1534 SubtreeConnectLevels[C.TreeID] =
1535 std::max(SubtreeConnectLevels[C.TreeID], C.Level);
1536 LLVM_DEBUG(dbgs() << " Tree: " << C.TreeID << " @"
1537 << SubtreeConnectLevels[C.TreeID] << '\n');
1538 }
1539}
1540
1541#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1543 OS << InstrCount << " / " << Length << " = ";
1544 if (!Length)
1545 OS << "BADILP";
1546 else
1547 OS << format("%g", ((double)InstrCount / Length));
1548}
1549
1551 dbgs() << *this << '\n';
1552}
1553
1554namespace llvm {
1555
1558 Val.print(OS);
1559 return OS;
1560}
1561
1562} // end namespace llvm
1563
1564#endif
unsigned SubReg
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:638
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:298
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static unsigned InstrCount
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
bool End
Definition: ELF_riscv.cpp:480
static Register UseReg(const MachineOperand &MO)
hexagon widen Hexagon Store false hexagon widen loads
hexagon widen stores
IRTranslator LLVM IR MI
Equivalence classes for small integers.
A common definition of LaneBitmask for use in TableGen and CodeGen.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
MachineInstr unsigned OpIdx
#define P(N)
raw_pwrite_stream & OS
static void toggleKills(const MachineRegisterInfo &MRI, LiveRegUnits &LiveRegs, MachineInstr &MI, bool addToLiveRegs)
static cl::opt< unsigned > ReductionSize("dag-maps-reduction-size", cl::Hidden, cl::desc("A huge scheduling region will have maps reduced by this many " "nodes at a time. Defaults to HugeRegion / 2."))
static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL)
If this machine instr has memory reference information and it can be tracked to a normal reference to...
static bool hasDataSucc(const SUnit *SU)
static cl::opt< bool > EnableSchedModel("schedmodel", cl::Hidden, cl::init(true), cl::desc("Use TargetSchedModel for latency lookup"))
static cl::opt< bool > EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::desc("Enable use of AA during MI DAG construction"))
static cl::opt< unsigned > HugeRegion("dag-maps-huge-region", cl::Hidden, cl::init(1000), cl::desc("The limit to use while constructing the DAG " "prior to scheduling, at which point a trade-off " "is made to avoid excessive compile time."))
static unsigned getReductionSize()
static void dumpSUList(const ScheduleDAGInstrs::SUList &L)
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static cl::opt< bool > EnableSchedItins("scheditins", cl::Hidden, cl::init(true), cl::desc("Use InstrItineraryData for latency lookup"))
static cl::opt< bool > SchedPrintCycles("sched-print-cycles", cl::Hidden, cl::init(false), cl::desc("Report top/bottom cycles when dumping SUnit instances"))
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
void reComputeSize()
Counts the number of SUs in this map after a reduction.
void insert(SUnit *SU, ValueType V)
Adds SU to the SUList of V.
void clear()
Clears map from all contents.
void clearList(ValueType V)
Clears the list of SUs mapped to V.
ValueType & operator[](const SUList &Key)
To keep NumNodes up to date, insert() is used instead of this operator w/ push_back().
A private abstract base class describing the concept of an individual alias analysis implementation.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
LLVM_ABI void compress()
compress - Compress equivalence classes by numbering them 0 .
unsigned getNumClasses() const
getNumClasses - Return the number of equivalence classes after compress() was called.
Definition: IntEqClasses.h:73
LLVM_ABI unsigned join(unsigned a, unsigned b)
Join the equivalence classes of a and b.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:31
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:199
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:238
bool hasImplicitUseOfPhysReg(MCRegister Reg) const
Return true if this instruction implicitly uses the specified physical register.
Definition: MCInstrDesc.h:589
LLVM_ABI bool hasImplicitDefOfPhysReg(MCRegister Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Definition: MCInstrDesc.cpp:32
MCRegUnitMaskIterator enumerates a list of register units and their associated lane masks for Reg.
iterator_range< MCRegUnitIterator > regunits(MCRegister Reg) const
Returns an iterator range over all regunits for Reg.
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
Instructions::iterator instr_iterator
LLVM_ABI std::string getFullName() const
Return a formatted string to identify this block and its parent function.
iterator_range< succ_iterator > successors()
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasTailCall() const
Returns true if the function contains a tail call.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Representation of each machine instruction.
Definition: MachineInstr.h:72
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:965
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:948
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:773
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:584
LLVM_ABI void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
filtered_mop_range all_uses()
Returns an iterator range over all operands that are (explicit or implicit) register uses.
Definition: MachineInstr.h:764
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
LLVM_ABI bool isConstantPhysReg(MCRegister PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
typename VectorType::iterator iterator
Definition: MapVector.h:42
iterator end()
Definition: MapVector.h:67
ValueT & operator[](const KeyT &Key)
Definition: MapVector.h:94
iterator find(const KeyT &Key)
Definition: MapVector.h:141
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
iterator begin()
Definition: MapVector.h:65
Array of PressureDiffs.
LLVM_ABI void addInstruction(unsigned Idx, const RegisterOperands &RegOpers, const MachineRegisterInfo &MRI)
Record pressure difference induced by the given operand list to node with index Idx.
LLVM_ABI void init(unsigned N)
Initialize an array of N PressureDiffs.
Special value supplied for machine level alias analysis.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void recede(SmallVectorImpl< VRegMaskOrUnit > *LiveUses=nullptr)
Recede across the previous instruction.
LLVM_ABI void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Scheduling dependency.
Definition: ScheduleDAG.h:51
SUnit * getSUnit() const
Definition: ScheduleDAG.h:507
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:513
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:54
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:57
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:56
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:55
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:74
@ MayAliasMem
Nonvolatile load/Store instructions that may alias.
Definition: ScheduleDAG.h:72
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:249
bool isCall
Is a function call.
Definition: ScheduleDAG.h:296
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
Definition: ScheduleDAG.h:411
unsigned NumSuccs
Definition: ScheduleDAG.h:280
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:285
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:277
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:309
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:274
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:391
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:312
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:367
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:301
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:286
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:270
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:310
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:299
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:300
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:269
LLVM_ABI bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:399
Internal state used to compute SchedDFSResult.
void visitPostorderNode(const SUnit *SU)
Called once for each node after all predecessors are visited.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, bool CheckLimit=true)
Joins the predecessor subtree with the successor that is its DFS parent.
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth)
Called by finalize() to record a connection between trees.
void finalize()
Sets each node's subtree ID to the representative ID and record connections between trees.
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ)
Adds a connection for cross edges.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ)
Called once for each tree edge after calling visitPostOrderNode on the predecessor.
void visitPreorder(const SUnit *SU)
Initializes this node's instruction count.
bool isVisited(const SUnit *SU) const
Returns true if this node been visited by the DFS traversal.
SchedDFSImpl(SchedDFSResult &r)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
void addVRegUseDeps(SUnit *SU, unsigned OperIdx)
Adds a register data dependency if the instruction that defines the virtual register used at OperIdx ...
void addVRegDefDeps(SUnit *SU, unsigned OperIdx)
Adds register output and data dependencies from this SUnit to instructions that occur later in the sa...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
std::string getDAGName() const override
Returns a label for the region of code covered by the DAG.
MachineBasicBlock * BB
The block in which to insert instructions.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
bool CanHandleTerminators
The standard DAG builder does not normally include terminators as DAG nodes because it does not creat...
void addBarrierChain(Value2SUsMap &map)
Adds barrier chain edges from all SUs in map, and then clear the map.
void reduceHugeMemNodeMaps(Value2SUsMap &stores, Value2SUsMap &loads, unsigned N)
Reduces maps in FIFO order, by N SUs.
void addPhysRegDeps(SUnit *SU, unsigned OperIdx)
Adds register dependencies (data, anti, and output) from this SUnit to following instructions in the ...
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
VReg2SUnitOperIdxMultiMap CurrentVRegUses
Tracks the last instructions in this region using each virtual register.
void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency)
Adds dependencies as needed from all SUs in list to SU.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx)
MO is an operand of SU's instruction that defines a physical register.
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const
Returns a mask for which lanes get read/written by the given (register) machine operand.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
SUnit * newSUnit(MachineInstr *MI)
Creates a new SUnit and return a ptr to it.
void initSUnits()
Creates an SUnit for each real instruction, numbered in top-down topological order.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.
std::list< SUnit * > SUList
A list of SUnits, used in Value2SUsMap, during DAG construction.
SUnit * BarrierChain
Remember a generic side-effecting instruction as we proceed.
BatchAAResults * getAAForDep() const
Returns a (possibly null) pointer to the current BatchAAResults.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit &SU) const override
RegUnit2SUnitsMap Defs
Defs, Uses - Remember where defs and uses of each register are as we iterate upward through the instr...
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
VReg2SUnitMultiMap CurrentVRegDefs
Tracks the last instruction(s) in this region defining each virtual register.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
void insertBarrierChain(Value2SUsMap &map)
Inserts a barrier chain in a huge region, far below current SU.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
std::optional< BatchAAResults > AAForDep
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void addSchedBarrierDeps()
Adds dependencies from instructions in the current list of instructions being scheduled to scheduling...
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency=0)
Adds a chain edge between SUa and SUb, but only if both AAResults and Target fail to deny the depende...
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
const MachineFrameInfo & MFI
bool deadDefHasNoUse(const MachineOperand &MO)
Returns true if the def register in MO has no uses.
std::string getGraphNodeLabel(const SUnit *SU) const override
Returns a label for a DAG node that points to an instruction.
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
Definition: ScheduleDAG.h:804
LLVM_ABI bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
LLVM_ABI void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:587
void clearDAG()
Clears the DAG state (between regions).
Definition: ScheduleDAG.cpp:63
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:584
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:588
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:585
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:589
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:586
void dumpNodeAll(const SUnit &SU) const
void dumpNodeName(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:590
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:66
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
iterator find(const KeyT &Key)
Find an element by its key.
bool empty() const
Returns true if the set is empty.
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
void clear()
Clears the set.
iterator erase(iterator I)
Erases an existing element identified by a valid iterator.
iterator end()
Returns an iterator past this container.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void eraseAll(const KeyT &K)
Erase all elements with the given key.
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
Definition: SparseSet.h:124
size_type size() const
size - Returns the number of elements in the set.
Definition: SparseSet.h:194
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:293
size_type count(const KeyT &Key) const
count - Returns 1 if this set contains an element identified by Key, 0 otherwise.
Definition: SparseSet.h:245
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:160
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
const bool HasDisjunctSubRegs
Whether the class supports two (or more) disjunct subregister indices.
LaneBitmask getLaneMask() const
Returns the combination of all lane masks of register in this class.
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
LLVM_ABI unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)
Initialize the machine model for instruction scheduling.
LLVM_ABI unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
'undef' values are things that do not have specified contents.
Definition: Constants.h:1420
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
LLVM Value Representation.
Definition: Value.h:75
int getNumOccurrences() const
Definition: CommandLine.h:400
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:662
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
@ Length
Definition: DWP.cpp:477
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1702
LLVM_ABI bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value * > &Objects)
This is a wrapper around getUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:126
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:312
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
#define N
Represent the ILP of the subDAG rooted at a DAG node.
Definition: ScheduleDFS.h:34
void print(raw_ostream &OS) const
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:82
constexpr bool any() const
Definition: LaneBitmask.h:53
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:123
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:68
Record a physical register access.
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:249
Mapping from virtual register to SUnit including an operand index.
An individual mapping from virtual register number to SUnit.